maxframe 0.1.0b5__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
maxframe/utils.py
ADDED
|
@@ -0,0 +1,1114 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import asyncio.events
|
|
16
|
+
import concurrent.futures
|
|
17
|
+
import contextvars
|
|
18
|
+
import dataclasses
|
|
19
|
+
import datetime
|
|
20
|
+
import enum
|
|
21
|
+
import functools
|
|
22
|
+
import hashlib
|
|
23
|
+
import importlib
|
|
24
|
+
import inspect
|
|
25
|
+
import io
|
|
26
|
+
import itertools
|
|
27
|
+
import numbers
|
|
28
|
+
import os
|
|
29
|
+
import pkgutil
|
|
30
|
+
import random
|
|
31
|
+
import struct
|
|
32
|
+
import sys
|
|
33
|
+
import threading
|
|
34
|
+
import time
|
|
35
|
+
import tokenize as pytokenize
|
|
36
|
+
import traceback
|
|
37
|
+
import types
|
|
38
|
+
import weakref
|
|
39
|
+
import zlib
|
|
40
|
+
from collections.abc import Hashable, Mapping
|
|
41
|
+
from contextlib import contextmanager
|
|
42
|
+
from typing import (
|
|
43
|
+
Any,
|
|
44
|
+
Awaitable,
|
|
45
|
+
Callable,
|
|
46
|
+
Dict,
|
|
47
|
+
Iterable,
|
|
48
|
+
List,
|
|
49
|
+
Optional,
|
|
50
|
+
Tuple,
|
|
51
|
+
Type,
|
|
52
|
+
TypeVar,
|
|
53
|
+
Union,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
import msgpack
|
|
57
|
+
import numpy as np
|
|
58
|
+
import pandas as pd
|
|
59
|
+
import traitlets
|
|
60
|
+
from tornado import httpclient, web
|
|
61
|
+
from tornado.simple_httpclient import HTTPTimeoutError
|
|
62
|
+
|
|
63
|
+
from ._utils import ( # noqa: F401 # pylint: disable=unused-import
|
|
64
|
+
NamedType,
|
|
65
|
+
Timer,
|
|
66
|
+
TypeDispatcher,
|
|
67
|
+
ceildiv,
|
|
68
|
+
get_user_call_point,
|
|
69
|
+
new_random_id,
|
|
70
|
+
register_tokenizer,
|
|
71
|
+
reset_id_random_seed,
|
|
72
|
+
to_binary,
|
|
73
|
+
to_str,
|
|
74
|
+
to_text,
|
|
75
|
+
tokenize,
|
|
76
|
+
tokenize_int,
|
|
77
|
+
)
|
|
78
|
+
from .lib.version import parse as parse_version
|
|
79
|
+
from .typing_ import ChunkType, EntityType, TileableType, TimeoutType
|
|
80
|
+
|
|
81
|
+
# make flake8 happy by referencing these imports
|
|
82
|
+
NamedType = NamedType
|
|
83
|
+
TypeDispatcher = TypeDispatcher
|
|
84
|
+
tokenize = tokenize
|
|
85
|
+
register_tokenizer = register_tokenizer
|
|
86
|
+
ceildiv = ceildiv
|
|
87
|
+
reset_id_random_seed = reset_id_random_seed
|
|
88
|
+
new_random_id = new_random_id
|
|
89
|
+
get_user_call_point = get_user_call_point
|
|
90
|
+
_is_ci = (os.environ.get("CI") or "0").lower() in ("1", "true")
|
|
91
|
+
pd_release_version: Tuple[int] = parse_version(pd.__version__).release
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
from pandas._libs import lib as _pd__libs_lib
|
|
95
|
+
from pandas._libs.lib import NoDefault, no_default
|
|
96
|
+
|
|
97
|
+
_raw__reduce__ = type(NoDefault).__reduce__
|
|
98
|
+
|
|
99
|
+
def _no_default__reduce__(self):
|
|
100
|
+
if self is not NoDefault:
|
|
101
|
+
return _raw__reduce__(self)
|
|
102
|
+
else: # pragma: no cover
|
|
103
|
+
return getattr, (_pd__libs_lib, "NoDefault")
|
|
104
|
+
|
|
105
|
+
if hasattr(_pd__libs_lib, "_NoDefault"): # pragma: no cover
|
|
106
|
+
# need to patch __reduce__ to make sure it can be properly unpickled
|
|
107
|
+
type(NoDefault).__reduce__ = _no_default__reduce__
|
|
108
|
+
else:
|
|
109
|
+
# introduced in pandas 1.5.0 : register for pickle compatibility
|
|
110
|
+
_pd__libs_lib._NoDefault = NoDefault
|
|
111
|
+
except ImportError: # pragma: no cover
|
|
112
|
+
|
|
113
|
+
class NoDefault(enum.Enum):
|
|
114
|
+
no_default = "NO_DEFAULT"
|
|
115
|
+
|
|
116
|
+
def __repr__(self) -> str:
|
|
117
|
+
return "<no_default>"
|
|
118
|
+
|
|
119
|
+
no_default = NoDefault.no_default
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
# register for pickle compatibility
|
|
123
|
+
from pandas._libs import lib as _pd__libs_lib
|
|
124
|
+
|
|
125
|
+
_pd__libs_lib.NoDefault = NoDefault
|
|
126
|
+
except (ImportError, AttributeError):
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
import pyarrow as pa
|
|
131
|
+
except ImportError:
|
|
132
|
+
pa = None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class classproperty:
|
|
136
|
+
def __init__(self, f):
|
|
137
|
+
self.f = f
|
|
138
|
+
|
|
139
|
+
def __get__(self, obj, owner):
|
|
140
|
+
return self.f(owner)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def implements(f: Callable):
|
|
144
|
+
def decorator(g):
|
|
145
|
+
g.__doc__ = f.__doc__
|
|
146
|
+
return g
|
|
147
|
+
|
|
148
|
+
return decorator
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class AttributeDict(dict):
|
|
152
|
+
def __getattr__(self, item):
|
|
153
|
+
try:
|
|
154
|
+
return self[item]
|
|
155
|
+
except KeyError:
|
|
156
|
+
raise AttributeError(f"'AttributeDict' object has no attribute {item}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def on_serialize_shape(shape: Tuple[int]):
|
|
160
|
+
if shape:
|
|
161
|
+
return tuple(s if not np.isnan(s) else -1 for s in shape)
|
|
162
|
+
return shape
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def on_deserialize_shape(shape: Tuple[int]):
|
|
166
|
+
if shape:
|
|
167
|
+
return tuple(s if s != -1 else np.nan for s in shape)
|
|
168
|
+
return shape
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def on_serialize_numpy_type(value: np.dtype):
|
|
172
|
+
if value is pd.NaT:
|
|
173
|
+
value = None
|
|
174
|
+
return value.item() if isinstance(value, np.generic) else value
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def on_serialize_nsplits(value: Tuple[Tuple[int]]):
|
|
178
|
+
if value is None:
|
|
179
|
+
return None
|
|
180
|
+
new_nsplits = []
|
|
181
|
+
for dim_splits in value:
|
|
182
|
+
new_nsplits.append(tuple(None if pd.isna(v) else v for v in dim_splits))
|
|
183
|
+
return tuple(new_nsplits)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def has_unknown_shape(*tiled_tileables: TileableType) -> bool:
|
|
187
|
+
for tileable in tiled_tileables:
|
|
188
|
+
if getattr(tileable, "shape", None) is None:
|
|
189
|
+
continue
|
|
190
|
+
if any(pd.isnull(s) for s in tileable.shape):
|
|
191
|
+
return True
|
|
192
|
+
if any(pd.isnull(s) for s in itertools.chain(*tileable.nsplits)):
|
|
193
|
+
return True
|
|
194
|
+
return False
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def calc_nsplits(chunk_idx_to_shape: Dict[Tuple[int], Tuple[int]]) -> Tuple[Tuple[int]]:
|
|
198
|
+
"""
|
|
199
|
+
Calculate a tiled entity's nsplits.
|
|
200
|
+
|
|
201
|
+
Parameters
|
|
202
|
+
----------
|
|
203
|
+
chunk_idx_to_shape : Dict type, {chunk_idx: chunk_shape}
|
|
204
|
+
|
|
205
|
+
Returns
|
|
206
|
+
-------
|
|
207
|
+
nsplits
|
|
208
|
+
"""
|
|
209
|
+
ndim = len(next(iter(chunk_idx_to_shape)))
|
|
210
|
+
tileable_nsplits = []
|
|
211
|
+
# for each dimension, record chunk shape whose index is zero on other dimensions
|
|
212
|
+
for i in range(ndim):
|
|
213
|
+
splits = []
|
|
214
|
+
for index, shape in chunk_idx_to_shape.items():
|
|
215
|
+
if all(idx == 0 for j, idx in enumerate(index) if j != i):
|
|
216
|
+
splits.append(shape[i])
|
|
217
|
+
tileable_nsplits.append(tuple(splits))
|
|
218
|
+
return tuple(tileable_nsplits)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def copy_tileables(tileables: List[TileableType], **kwargs):
|
|
222
|
+
inputs = kwargs.pop("inputs", None)
|
|
223
|
+
copy_key = kwargs.pop("copy_key", True)
|
|
224
|
+
copy_id = kwargs.pop("copy_id", True)
|
|
225
|
+
if kwargs:
|
|
226
|
+
raise TypeError(f"got un unexpected keyword argument '{next(iter(kwargs))}'")
|
|
227
|
+
if len(tileables) > 1:
|
|
228
|
+
# cannot handle tileables with different operators here
|
|
229
|
+
# try to copy separately if so
|
|
230
|
+
if len({t.op for t in tileables}) != 1:
|
|
231
|
+
raise TypeError("All tileables' operators should be same.")
|
|
232
|
+
|
|
233
|
+
op = tileables[0].op.copy().reset_key()
|
|
234
|
+
if copy_key:
|
|
235
|
+
op._key = tileables[0].op.key
|
|
236
|
+
kws = []
|
|
237
|
+
for t in tileables:
|
|
238
|
+
params = t.params.copy()
|
|
239
|
+
if copy_key:
|
|
240
|
+
params["_key"] = t.key
|
|
241
|
+
if copy_id:
|
|
242
|
+
params["_id"] = t.id
|
|
243
|
+
params.update(t.extra_params)
|
|
244
|
+
kws.append(params)
|
|
245
|
+
inputs = inputs or op.inputs
|
|
246
|
+
return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def build_fetch_chunk(chunk: ChunkType, **kwargs) -> ChunkType:
|
|
250
|
+
from .core.operator import ShuffleProxy
|
|
251
|
+
|
|
252
|
+
chunk_op = chunk.op
|
|
253
|
+
params = chunk.params.copy()
|
|
254
|
+
assert not isinstance(chunk_op, ShuffleProxy)
|
|
255
|
+
# for non-shuffle nodes, we build Fetch chunks
|
|
256
|
+
# to replace original chunk
|
|
257
|
+
op = chunk_op.get_fetch_op_cls(chunk)(sparse=chunk.op.sparse, gpu=chunk.op.gpu)
|
|
258
|
+
return op.new_chunk(
|
|
259
|
+
None,
|
|
260
|
+
is_broadcaster=chunk.is_broadcaster,
|
|
261
|
+
kws=[params],
|
|
262
|
+
_key=chunk.key,
|
|
263
|
+
**kwargs,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def build_fetch_tileable(tileable: TileableType) -> TileableType:
|
|
268
|
+
if tileable.is_coarse():
|
|
269
|
+
chunks = None
|
|
270
|
+
else:
|
|
271
|
+
chunks = []
|
|
272
|
+
for c in tileable.chunks:
|
|
273
|
+
fetch_chunk = build_fetch_chunk(c, index=c.index)
|
|
274
|
+
chunks.append(fetch_chunk)
|
|
275
|
+
|
|
276
|
+
tileable_op = tileable.op
|
|
277
|
+
params = tileable.params.copy()
|
|
278
|
+
|
|
279
|
+
new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
|
|
280
|
+
return new_op.new_tileables(
|
|
281
|
+
None,
|
|
282
|
+
chunks=chunks,
|
|
283
|
+
nsplits=tileable.nsplits,
|
|
284
|
+
_key=tileable.key,
|
|
285
|
+
_id=tileable.id,
|
|
286
|
+
**params,
|
|
287
|
+
)[0]
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def build_fetch(entity: EntityType) -> EntityType:
|
|
291
|
+
from .core import CHUNK_TYPE, ENTITY_TYPE
|
|
292
|
+
|
|
293
|
+
if isinstance(entity, CHUNK_TYPE):
|
|
294
|
+
return build_fetch_chunk(entity)
|
|
295
|
+
elif isinstance(entity, ENTITY_TYPE):
|
|
296
|
+
return build_fetch_tileable(entity)
|
|
297
|
+
else:
|
|
298
|
+
raise TypeError(f"Type {type(entity)} not supported")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def get_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
|
|
302
|
+
if pd.api.types.is_extension_array_dtype(dtype):
|
|
303
|
+
return dtype
|
|
304
|
+
elif dtype is pd.Timestamp or dtype is datetime.datetime:
|
|
305
|
+
return np.dtype("datetime64[ns]")
|
|
306
|
+
elif dtype is pd.Timedelta or dtype is datetime.timedelta:
|
|
307
|
+
return np.dtype("timedelta64[ns]")
|
|
308
|
+
else:
|
|
309
|
+
return np.dtype(dtype)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def serialize_serializable(serializable, compress: bool = False):
|
|
313
|
+
from .serialization import serialize
|
|
314
|
+
|
|
315
|
+
bio = io.BytesIO()
|
|
316
|
+
header, buffers = serialize(serializable)
|
|
317
|
+
buf_sizes = [getattr(buf, "nbytes", len(buf)) for buf in buffers]
|
|
318
|
+
header[0]["buf_sizes"] = buf_sizes
|
|
319
|
+
s_header = msgpack.dumps(header)
|
|
320
|
+
bio.write(struct.pack("<Q", len(s_header)))
|
|
321
|
+
bio.write(s_header)
|
|
322
|
+
for buf in buffers:
|
|
323
|
+
bio.write(buf)
|
|
324
|
+
ser_graph = bio.getvalue()
|
|
325
|
+
|
|
326
|
+
if compress:
|
|
327
|
+
ser_graph = zlib.compress(ser_graph)
|
|
328
|
+
return ser_graph
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def deserialize_serializable(ser_serializable: bytes):
|
|
332
|
+
from .serialization import deserialize
|
|
333
|
+
|
|
334
|
+
bio = io.BytesIO(ser_serializable)
|
|
335
|
+
s_header_length = struct.unpack("Q", bio.read(8))[0]
|
|
336
|
+
header2 = msgpack.loads(bio.read(s_header_length))
|
|
337
|
+
buffers2 = [bio.read(s) for s in header2[0]["buf_sizes"]]
|
|
338
|
+
return deserialize(header2, buffers2)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def skip_na_call(func: Callable):
|
|
342
|
+
@functools.wraps(func)
|
|
343
|
+
def new_func(x):
|
|
344
|
+
return func(x) if x is not None else None
|
|
345
|
+
|
|
346
|
+
return new_func
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def url_path_join(*pieces):
|
|
350
|
+
"""Join components of url into a relative url
|
|
351
|
+
|
|
352
|
+
Use to prevent double slash when joining subpath. This will leave the
|
|
353
|
+
initial and final / in place
|
|
354
|
+
"""
|
|
355
|
+
initial = pieces[0].startswith("/")
|
|
356
|
+
final = pieces[-1].endswith("/")
|
|
357
|
+
stripped = [s.strip("/") for s in pieces]
|
|
358
|
+
result = "/".join(s for s in stripped if s)
|
|
359
|
+
if initial:
|
|
360
|
+
result = "/" + result
|
|
361
|
+
if final:
|
|
362
|
+
result = result + "/"
|
|
363
|
+
if result == "//":
|
|
364
|
+
result = "/"
|
|
365
|
+
return result
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def random_ports(port: int, n: int):
|
|
369
|
+
"""Generate a list of n random ports near the given port.
|
|
370
|
+
|
|
371
|
+
The first 5 ports will be sequential, and the remaining n-5 will be
|
|
372
|
+
randomly selected in the range [port-2*n, port+2*n].
|
|
373
|
+
"""
|
|
374
|
+
for i in range(min(5, n)):
|
|
375
|
+
yield port + i
|
|
376
|
+
for i in range(n - 5):
|
|
377
|
+
yield max(1, port + random.randint(-2 * n, 2 * n))
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def build_temp_table_name(session_id: str, tileable_key: str) -> str:
|
|
381
|
+
return f"tmp_mf_{session_id}_{tileable_key}"
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> str:
|
|
385
|
+
temp_table = build_temp_table_name(session_id, tileable_key)
|
|
386
|
+
return f"{temp_table}_intermediate"
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def build_session_volume_name(session_id: str) -> str:
|
|
390
|
+
return f"mf_vol_{session_id}"
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def build_tileable_dir_name(tileable_key: str) -> str:
|
|
394
|
+
m = hashlib.md5()
|
|
395
|
+
m.update(f"mf_dir_{tileable_key}".encode())
|
|
396
|
+
return m.hexdigest()
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def extract_messages_and_stacks(exc: Exception) -> Tuple[List[str], List[str]]:
|
|
400
|
+
cur_exc = exc
|
|
401
|
+
messages, stacks = [], []
|
|
402
|
+
while True:
|
|
403
|
+
messages.append(str(cur_exc))
|
|
404
|
+
stacks.append("".join(traceback.format_tb(cur_exc.__traceback__)))
|
|
405
|
+
if exc.__cause__ is None:
|
|
406
|
+
break
|
|
407
|
+
cur_exc = exc.__cause__
|
|
408
|
+
return messages, stacks
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
async def wait_http_response(
|
|
412
|
+
url: str, *, request_timeout: TimeoutType = None, **kwargs
|
|
413
|
+
) -> httpclient.HTTPResponse:
|
|
414
|
+
start_time = time.time()
|
|
415
|
+
while request_timeout is None or time.time() - start_time < request_timeout:
|
|
416
|
+
timeout_left = min(10.0, time.time() - start_time) if request_timeout else None
|
|
417
|
+
try:
|
|
418
|
+
return await httpclient.AsyncHTTPClient().fetch(
|
|
419
|
+
url, request_timeout=timeout_left, **kwargs
|
|
420
|
+
)
|
|
421
|
+
except HTTPTimeoutError:
|
|
422
|
+
pass
|
|
423
|
+
raise TimeoutError
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def get_handler_timeout_value(handler: web.RequestHandler) -> TimeoutType:
|
|
427
|
+
wait = bool(int(handler.get_argument("wait", "0")))
|
|
428
|
+
timeout = float(handler.get_argument("timeout", "0"))
|
|
429
|
+
if wait and abs(timeout) < 1e-6:
|
|
430
|
+
timeout = None
|
|
431
|
+
elif not wait:
|
|
432
|
+
timeout = 0
|
|
433
|
+
return timeout
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def format_timeout_params(timeout: TimeoutType) -> str:
|
|
437
|
+
if timeout is None:
|
|
438
|
+
return "?wait=1"
|
|
439
|
+
elif abs(timeout) < 1e-6:
|
|
440
|
+
return "?wait=0"
|
|
441
|
+
else:
|
|
442
|
+
return f"?wait=1&timeout={timeout}"
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
async def to_thread_pool(func, *args, pool=None, **kwargs):
|
|
446
|
+
loop = asyncio.events.get_running_loop()
|
|
447
|
+
ctx = contextvars.copy_context()
|
|
448
|
+
func_call = functools.partial(ctx.run, func, *args, **kwargs)
|
|
449
|
+
return await loop.run_in_executor(pool, func_call)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
class ToThreadCancelledError(asyncio.CancelledError):
|
|
453
|
+
def __init__(self, *args, result=None):
|
|
454
|
+
super().__init__(*args)
|
|
455
|
+
self._result = result
|
|
456
|
+
|
|
457
|
+
@property
|
|
458
|
+
def result(self):
|
|
459
|
+
return self._result
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
_ToThreadRetType = TypeVar("_ToThreadRetType")
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
class ToThreadMixin:
|
|
466
|
+
_thread_pool_size = 1
|
|
467
|
+
_counter = itertools.count().__next__
|
|
468
|
+
|
|
469
|
+
def __del__(self):
|
|
470
|
+
if hasattr(self, "_pool"):
|
|
471
|
+
kw = {"wait": False}
|
|
472
|
+
if sys.version_info[:2] >= (3, 9):
|
|
473
|
+
kw["cancel_futures"] = True
|
|
474
|
+
self._pool.shutdown(**kw)
|
|
475
|
+
|
|
476
|
+
async def to_thread(
|
|
477
|
+
self,
|
|
478
|
+
func: Callable[..., _ToThreadRetType],
|
|
479
|
+
*args,
|
|
480
|
+
wait_on_cancel: bool = False,
|
|
481
|
+
timeout: float = None,
|
|
482
|
+
**kwargs,
|
|
483
|
+
) -> _ToThreadRetType:
|
|
484
|
+
if not hasattr(self, "_pool"):
|
|
485
|
+
self._pool = concurrent.futures.ThreadPoolExecutor(
|
|
486
|
+
self._thread_pool_size,
|
|
487
|
+
thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
task = asyncio.create_task(
|
|
491
|
+
to_thread_pool(func, *args, **kwargs, pool=self._pool)
|
|
492
|
+
)
|
|
493
|
+
try:
|
|
494
|
+
return await asyncio.wait_for(asyncio.shield(task), timeout)
|
|
495
|
+
except (asyncio.CancelledError, asyncio.TimeoutError) as ex:
|
|
496
|
+
if not wait_on_cancel:
|
|
497
|
+
raise
|
|
498
|
+
result = await task
|
|
499
|
+
raise ToThreadCancelledError(*ex.args, result=result)
|
|
500
|
+
|
|
501
|
+
def ensure_async_call(
|
|
502
|
+
self,
|
|
503
|
+
func: Callable[..., _ToThreadRetType],
|
|
504
|
+
*args,
|
|
505
|
+
wait_on_cancel: bool = False,
|
|
506
|
+
**kwargs,
|
|
507
|
+
) -> Awaitable[_ToThreadRetType]:
|
|
508
|
+
if asyncio.iscoroutinefunction(func):
|
|
509
|
+
return func(*args, **kwargs)
|
|
510
|
+
return self.to_thread(func, *args, wait_on_cancel=wait_on_cancel, **kwargs)
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def config_odps_default_options():
|
|
514
|
+
from odps import options as odps_options
|
|
515
|
+
|
|
516
|
+
odps_options.sql.settings = {
|
|
517
|
+
"odps.longtime.instance": "false",
|
|
518
|
+
"odps.sql.session.select.only": "false",
|
|
519
|
+
"metaservice.client.cache.enable": "false",
|
|
520
|
+
"odps.sql.session.result.cache.enable": "false",
|
|
521
|
+
"odps.sql.submit.mode": "script",
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def to_hashable(obj: Any) -> Hashable:
|
|
526
|
+
if isinstance(obj, Mapping):
|
|
527
|
+
items = type(obj)((k, to_hashable(v)) for k, v in obj.items())
|
|
528
|
+
elif not isinstance(obj, str) and isinstance(obj, Iterable):
|
|
529
|
+
items = tuple(to_hashable(item) for item in obj)
|
|
530
|
+
elif isinstance(obj, Hashable):
|
|
531
|
+
items = obj
|
|
532
|
+
else:
|
|
533
|
+
raise TypeError(type(obj))
|
|
534
|
+
return items
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def estimate_pandas_size(
|
|
538
|
+
pd_obj, max_samples: int = 10, min_sample_rows: int = 100
|
|
539
|
+
) -> int:
|
|
540
|
+
if len(pd_obj) <= min_sample_rows or isinstance(pd_obj, pd.RangeIndex):
|
|
541
|
+
return sys.getsizeof(pd_obj)
|
|
542
|
+
if isinstance(pd_obj, pd.MultiIndex):
|
|
543
|
+
# MultiIndex's sample size can't be used to estimate
|
|
544
|
+
return sys.getsizeof(pd_obj)
|
|
545
|
+
|
|
546
|
+
from .dataframe.arrays import ArrowDtype
|
|
547
|
+
|
|
548
|
+
def _is_fast_dtype(dtype):
|
|
549
|
+
if isinstance(dtype, np.dtype):
|
|
550
|
+
return np.issubdtype(dtype, np.number)
|
|
551
|
+
else:
|
|
552
|
+
return isinstance(dtype, ArrowDtype)
|
|
553
|
+
|
|
554
|
+
dtypes = []
|
|
555
|
+
is_series = False
|
|
556
|
+
if isinstance(pd_obj, pd.DataFrame):
|
|
557
|
+
dtypes.extend(pd_obj.dtypes)
|
|
558
|
+
index_obj = pd_obj.index
|
|
559
|
+
elif isinstance(pd_obj, pd.Series):
|
|
560
|
+
dtypes.append(pd_obj.dtype)
|
|
561
|
+
index_obj = pd_obj.index
|
|
562
|
+
is_series = True
|
|
563
|
+
else:
|
|
564
|
+
index_obj = pd_obj
|
|
565
|
+
|
|
566
|
+
# handling possible MultiIndex
|
|
567
|
+
if hasattr(index_obj, "dtypes"):
|
|
568
|
+
dtypes.extend(index_obj.dtypes)
|
|
569
|
+
else:
|
|
570
|
+
dtypes.append(index_obj.dtype)
|
|
571
|
+
|
|
572
|
+
if all(_is_fast_dtype(dtype) for dtype in dtypes):
|
|
573
|
+
return sys.getsizeof(pd_obj)
|
|
574
|
+
|
|
575
|
+
indices = np.sort(np.random.choice(len(pd_obj), size=max_samples, replace=False))
|
|
576
|
+
iloc = pd_obj if isinstance(pd_obj, pd.Index) else pd_obj.iloc
|
|
577
|
+
if isinstance(index_obj, pd.MultiIndex):
|
|
578
|
+
# MultiIndex's sample size is much greater than expected, thus we calculate
|
|
579
|
+
# the size separately.
|
|
580
|
+
index_size = sys.getsizeof(pd_obj.index)
|
|
581
|
+
if is_series:
|
|
582
|
+
sample_frame_size = iloc[indices].memory_usage(deep=True, index=False)
|
|
583
|
+
else:
|
|
584
|
+
sample_frame_size = iloc[indices].memory_usage(deep=True, index=False).sum()
|
|
585
|
+
return index_size + sample_frame_size * len(pd_obj) // max_samples
|
|
586
|
+
else:
|
|
587
|
+
sample_size = sys.getsizeof(iloc[indices])
|
|
588
|
+
return sample_size * len(pd_obj) // max_samples
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class ModulePlaceholder:
|
|
592
|
+
def __init__(self, mod_name: str):
|
|
593
|
+
self._mod_name = mod_name
|
|
594
|
+
|
|
595
|
+
def _raises(self):
|
|
596
|
+
raise AttributeError(f"{self._mod_name} is required but not installed.")
|
|
597
|
+
|
|
598
|
+
def __getattr__(self, key):
|
|
599
|
+
self._raises()
|
|
600
|
+
|
|
601
|
+
def __call__(self, *_args, **_kwargs):
|
|
602
|
+
self._raises()
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def lazy_import(
|
|
606
|
+
name: str,
|
|
607
|
+
package: str = None,
|
|
608
|
+
globals: Dict = None, # pylint: disable=redefined-builtin
|
|
609
|
+
locals: Dict = None, # pylint: disable=redefined-builtin
|
|
610
|
+
rename: str = None,
|
|
611
|
+
placeholder: bool = False,
|
|
612
|
+
):
|
|
613
|
+
rename = rename or name
|
|
614
|
+
prefix_name = name.split(".", 1)[0]
|
|
615
|
+
globals = globals or inspect.currentframe().f_back.f_globals
|
|
616
|
+
|
|
617
|
+
class LazyModule(object):
|
|
618
|
+
def __init__(self):
|
|
619
|
+
self._on_loads = []
|
|
620
|
+
|
|
621
|
+
def __getattr__(self, item):
|
|
622
|
+
if item.startswith("_pytest") or item in ("__bases__", "__test__"):
|
|
623
|
+
raise AttributeError(item)
|
|
624
|
+
|
|
625
|
+
real_mod = importlib.import_module(name, package=package)
|
|
626
|
+
if rename in globals:
|
|
627
|
+
globals[rename] = real_mod
|
|
628
|
+
elif locals is not None:
|
|
629
|
+
locals[rename] = real_mod
|
|
630
|
+
ret = getattr(real_mod, item)
|
|
631
|
+
for on_load_func in self._on_loads:
|
|
632
|
+
on_load_func()
|
|
633
|
+
# make sure on_load hooks only executed once
|
|
634
|
+
self._on_loads = []
|
|
635
|
+
return ret
|
|
636
|
+
|
|
637
|
+
def add_load_handler(self, func: Callable):
|
|
638
|
+
self._on_loads.append(func)
|
|
639
|
+
return func
|
|
640
|
+
|
|
641
|
+
if pkgutil.find_loader(prefix_name) is not None:
|
|
642
|
+
return LazyModule()
|
|
643
|
+
elif placeholder:
|
|
644
|
+
return ModulePlaceholder(prefix_name)
|
|
645
|
+
else:
|
|
646
|
+
return None
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def sbytes(x: Any) -> bytes:
|
|
650
|
+
# NB: bytes() in Python 3 has different semantic with Python 2, see: help(bytes)
|
|
651
|
+
from numbers import Number
|
|
652
|
+
|
|
653
|
+
if x is None or isinstance(x, Number):
|
|
654
|
+
return bytes(str(x), encoding="ascii")
|
|
655
|
+
elif isinstance(x, list):
|
|
656
|
+
return bytes("[" + ", ".join([str(k) for k in x]) + "]", encoding="utf-8")
|
|
657
|
+
elif isinstance(x, tuple):
|
|
658
|
+
return bytes("(" + ", ".join([str(k) for k in x]) + ")", encoding="utf-8")
|
|
659
|
+
elif isinstance(x, str):
|
|
660
|
+
return bytes(x, encoding="utf-8")
|
|
661
|
+
else:
|
|
662
|
+
return bytes(x)
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def is_full_slice(slc: Any) -> bool:
|
|
666
|
+
"""Check if the input is a full slice ((:) or (0:))"""
|
|
667
|
+
return (
|
|
668
|
+
isinstance(slc, slice)
|
|
669
|
+
and (slc.start == 0 or slc.start is None)
|
|
670
|
+
and slc.stop is None
|
|
671
|
+
and slc.step is None
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
_enter_counter = 0
|
|
676
|
+
_initial_session = None
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
def enter_current_session(func: Callable):
|
|
680
|
+
@functools.wraps(func)
|
|
681
|
+
def wrapped(cls, ctx, op):
|
|
682
|
+
from .session import AbstractSession, get_default_session
|
|
683
|
+
|
|
684
|
+
global _enter_counter, _initial_session
|
|
685
|
+
# skip in some test cases
|
|
686
|
+
if not hasattr(ctx, "get_current_session"):
|
|
687
|
+
return func(cls, ctx, op)
|
|
688
|
+
|
|
689
|
+
with AbstractSession._lock:
|
|
690
|
+
if _enter_counter == 0:
|
|
691
|
+
# to handle nested call, only set initial session
|
|
692
|
+
# in first call
|
|
693
|
+
session = ctx.get_current_session()
|
|
694
|
+
_initial_session = get_default_session()
|
|
695
|
+
session.as_default()
|
|
696
|
+
_enter_counter += 1
|
|
697
|
+
|
|
698
|
+
try:
|
|
699
|
+
result = func(cls, ctx, op)
|
|
700
|
+
finally:
|
|
701
|
+
with AbstractSession._lock:
|
|
702
|
+
_enter_counter -= 1
|
|
703
|
+
if _enter_counter == 0:
|
|
704
|
+
# set previous session when counter is 0
|
|
705
|
+
if _initial_session:
|
|
706
|
+
_initial_session.as_default()
|
|
707
|
+
else:
|
|
708
|
+
AbstractSession.reset_default()
|
|
709
|
+
return result
|
|
710
|
+
|
|
711
|
+
return wrapped
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
_func_token_cache = weakref.WeakKeyDictionary()
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _get_func_token_values(func):
|
|
718
|
+
if hasattr(func, "__code__"):
|
|
719
|
+
tokens = [func.__code__.co_code]
|
|
720
|
+
if func.__closure__ is not None:
|
|
721
|
+
cvars = tuple([x.cell_contents for x in func.__closure__])
|
|
722
|
+
tokens.append(cvars)
|
|
723
|
+
return tokens
|
|
724
|
+
else:
|
|
725
|
+
tokens = []
|
|
726
|
+
while isinstance(func, functools.partial):
|
|
727
|
+
tokens.extend([func.args, func.keywords])
|
|
728
|
+
func = func.func
|
|
729
|
+
if hasattr(func, "__code__"):
|
|
730
|
+
tokens.extend(_get_func_token_values(func))
|
|
731
|
+
elif isinstance(func, types.BuiltinFunctionType):
|
|
732
|
+
tokens.extend([func.__module__, func.__qualname__])
|
|
733
|
+
else:
|
|
734
|
+
tokens.append(func)
|
|
735
|
+
return tokens
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def get_func_token(func):
|
|
739
|
+
try:
|
|
740
|
+
token = _func_token_cache.get(func)
|
|
741
|
+
if token is None:
|
|
742
|
+
fields = _get_func_token_values(func)
|
|
743
|
+
token = tokenize(*fields)
|
|
744
|
+
_func_token_cache[func] = token
|
|
745
|
+
return token
|
|
746
|
+
except TypeError: # cannot create weak reference to func like 'numpy.ufunc'
|
|
747
|
+
return tokenize(*_get_func_token_values(func))
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
_io_quiet_local = threading.local()
|
|
751
|
+
_io_quiet_lock = threading.Lock()
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
class _QuietIOWrapper:
|
|
755
|
+
def __init__(self, wrapped):
|
|
756
|
+
self.wrapped = wrapped
|
|
757
|
+
|
|
758
|
+
def __getattr__(self, item):
|
|
759
|
+
return getattr(self.wrapped, item)
|
|
760
|
+
|
|
761
|
+
def write(self, d):
|
|
762
|
+
if getattr(_io_quiet_local, "is_wrapped", False):
|
|
763
|
+
return 0
|
|
764
|
+
return self.wrapped.write(d)
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
@contextmanager
|
|
768
|
+
def quiet_stdio():
|
|
769
|
+
"""Quiets standard outputs when inferring types of functions"""
|
|
770
|
+
with _io_quiet_lock:
|
|
771
|
+
_io_quiet_local.is_wrapped = True
|
|
772
|
+
sys.stdout = _QuietIOWrapper(sys.stdout)
|
|
773
|
+
sys.stderr = _QuietIOWrapper(sys.stderr)
|
|
774
|
+
|
|
775
|
+
try:
|
|
776
|
+
yield
|
|
777
|
+
finally:
|
|
778
|
+
with _io_quiet_lock:
|
|
779
|
+
sys.stdout = sys.stdout.wrapped
|
|
780
|
+
sys.stderr = sys.stderr.wrapped
|
|
781
|
+
if not isinstance(sys.stdout, _QuietIOWrapper):
|
|
782
|
+
_io_quiet_local.is_wrapped = False
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
# from https://github.com/ericvsmith/dataclasses/blob/master/dataclass_tools.py
|
|
786
|
+
# released under Apache License 2.0
|
|
787
|
+
def dataslots(cls):
|
|
788
|
+
# Need to create a new class, since we can't set __slots__
|
|
789
|
+
# after a class has been created.
|
|
790
|
+
|
|
791
|
+
# Make sure __slots__ isn't already set.
|
|
792
|
+
if "__slots__" in cls.__dict__: # pragma: no cover
|
|
793
|
+
raise TypeError(f"{cls.__name__} already specifies __slots__")
|
|
794
|
+
|
|
795
|
+
# Create a new dict for our new class.
|
|
796
|
+
cls_dict = dict(cls.__dict__)
|
|
797
|
+
field_names = tuple(f.name for f in dataclasses.fields(cls))
|
|
798
|
+
cls_dict["__slots__"] = field_names
|
|
799
|
+
for field_name in field_names:
|
|
800
|
+
# Remove our attributes, if present. They'll still be
|
|
801
|
+
# available in _MARKER.
|
|
802
|
+
cls_dict.pop(field_name, None)
|
|
803
|
+
# Remove __dict__ itself.
|
|
804
|
+
cls_dict.pop("__dict__", None)
|
|
805
|
+
# And finally create the class.
|
|
806
|
+
qualname = getattr(cls, "__qualname__", None)
|
|
807
|
+
cls = type(cls)(cls.__name__, cls.__bases__, cls_dict)
|
|
808
|
+
if qualname is not None:
|
|
809
|
+
cls.__qualname__ = qualname
|
|
810
|
+
return cls
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def adapt_docstring(doc: str) -> str:
|
|
814
|
+
"""
|
|
815
|
+
Adapt numpy-style docstrings to MaxFrame docstring.
|
|
816
|
+
|
|
817
|
+
This util function will add MaxFrame imports, replace object references
|
|
818
|
+
and add execute calls. Note that check is needed after replacement.
|
|
819
|
+
"""
|
|
820
|
+
if doc is None:
|
|
821
|
+
return None
|
|
822
|
+
|
|
823
|
+
lines = []
|
|
824
|
+
first_prompt = True
|
|
825
|
+
prev_prompt = False
|
|
826
|
+
has_numpy = "np." in doc
|
|
827
|
+
has_pandas = "pd." in doc
|
|
828
|
+
|
|
829
|
+
for line in doc.splitlines():
|
|
830
|
+
sp = line.strip()
|
|
831
|
+
if sp.startswith(">>>") or sp.startswith("..."):
|
|
832
|
+
prev_prompt = True
|
|
833
|
+
if first_prompt:
|
|
834
|
+
first_prompt = False
|
|
835
|
+
indent = "".join(itertools.takewhile(lambda x: x in (" ", "\t"), line))
|
|
836
|
+
if has_numpy:
|
|
837
|
+
lines.extend([indent + ">>> import maxframe.tensor as mt"])
|
|
838
|
+
if has_pandas:
|
|
839
|
+
lines.extend([indent + ">>> import maxframe.dataframe as md"])
|
|
840
|
+
line = line.replace("np.", "mt.").replace("pd.", "md.")
|
|
841
|
+
elif prev_prompt:
|
|
842
|
+
prev_prompt = False
|
|
843
|
+
if sp:
|
|
844
|
+
lines[-1] += ".execute()"
|
|
845
|
+
lines.append(line)
|
|
846
|
+
return "\n".join(lines)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def stringify_path(path: Union[str, os.PathLike]) -> str:
|
|
850
|
+
"""
|
|
851
|
+
Convert *path* to a string or unicode path if possible.
|
|
852
|
+
"""
|
|
853
|
+
if isinstance(path, str):
|
|
854
|
+
return path
|
|
855
|
+
|
|
856
|
+
# checking whether path implements the filesystem protocol
|
|
857
|
+
try:
|
|
858
|
+
return path.__fspath__()
|
|
859
|
+
except AttributeError:
|
|
860
|
+
raise TypeError("not a path-like object")
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
_memory_size_indices = {"": 0, "k": 1, "m": 2, "g": 3, "t": 4}
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
|
|
867
|
+
if isinstance(value, numbers.Number):
|
|
868
|
+
return float(value), False
|
|
869
|
+
|
|
870
|
+
value = value.strip().lower()
|
|
871
|
+
num_pos = 0
|
|
872
|
+
while num_pos < len(value) and value[num_pos] in "0123456789.-":
|
|
873
|
+
num_pos += 1
|
|
874
|
+
|
|
875
|
+
value, suffix = value[:num_pos], value[num_pos:]
|
|
876
|
+
suffix = suffix.strip()
|
|
877
|
+
if suffix.endswith("%"):
|
|
878
|
+
return float(value) / 100, True
|
|
879
|
+
|
|
880
|
+
try:
|
|
881
|
+
return float(value) * (1024 ** _memory_size_indices[suffix[:1]]), False
|
|
882
|
+
except (ValueError, KeyError):
|
|
883
|
+
raise ValueError(f"Unknown limitation value: {value}")
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
def remove_suffix(value: str, suffix: str) -> str:
|
|
887
|
+
return value[: -len(suffix)] if value.endswith(suffix) else value
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
def find_objects(nested: Union[List, Dict], types: Union[Type, Tuple[Type]]) -> List:
|
|
891
|
+
found = []
|
|
892
|
+
stack = [nested]
|
|
893
|
+
|
|
894
|
+
while len(stack) > 0:
|
|
895
|
+
it = stack.pop()
|
|
896
|
+
if isinstance(it, types):
|
|
897
|
+
found.append(it)
|
|
898
|
+
continue
|
|
899
|
+
|
|
900
|
+
if isinstance(it, (list, tuple, set)):
|
|
901
|
+
stack.extend(list(it)[::-1])
|
|
902
|
+
elif isinstance(it, dict):
|
|
903
|
+
stack.extend(list(it.values())[::-1])
|
|
904
|
+
|
|
905
|
+
return found
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
def replace_objects(nested: Union[List, Dict], mapping: Mapping) -> Union[List, Dict]:
|
|
909
|
+
if not mapping:
|
|
910
|
+
return nested
|
|
911
|
+
|
|
912
|
+
if isinstance(nested, dict):
|
|
913
|
+
vals = list(nested.values())
|
|
914
|
+
else:
|
|
915
|
+
vals = list(nested)
|
|
916
|
+
|
|
917
|
+
new_vals = []
|
|
918
|
+
for val in vals:
|
|
919
|
+
if isinstance(val, (dict, list, tuple, set)):
|
|
920
|
+
new_val = replace_objects(val, mapping)
|
|
921
|
+
else:
|
|
922
|
+
try:
|
|
923
|
+
new_val = mapping.get(val, val)
|
|
924
|
+
except TypeError:
|
|
925
|
+
new_val = val
|
|
926
|
+
new_vals.append(new_val)
|
|
927
|
+
|
|
928
|
+
if isinstance(nested, dict):
|
|
929
|
+
return type(nested)((k, v) for k, v in zip(nested.keys(), new_vals))
|
|
930
|
+
else:
|
|
931
|
+
return type(nested)(new_vals)
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
def trait_from_env(
|
|
935
|
+
trait_name: str, env: str, trait: Optional[traitlets.TraitType] = None
|
|
936
|
+
):
|
|
937
|
+
if trait is None:
|
|
938
|
+
prev_locals = inspect.stack()[1].frame.f_locals
|
|
939
|
+
trait = prev_locals[trait_name]
|
|
940
|
+
|
|
941
|
+
default_value = trait.default_value
|
|
942
|
+
sub_trait: traitlets.TraitType = getattr(trait, "_trait", None)
|
|
943
|
+
|
|
944
|
+
def default_value_simple(self):
|
|
945
|
+
env_val = os.getenv(env, default_value)
|
|
946
|
+
if isinstance(env_val, (str, bytes)):
|
|
947
|
+
return trait.from_string(env_val)
|
|
948
|
+
return env_val
|
|
949
|
+
|
|
950
|
+
def default_value_list(self):
|
|
951
|
+
env_val = os.getenv(env, default_value)
|
|
952
|
+
if env_val is None or isinstance(env_val, traitlets.Sentinel):
|
|
953
|
+
return env_val
|
|
954
|
+
|
|
955
|
+
parts = env_val.split(",") if env_val else []
|
|
956
|
+
if sub_trait:
|
|
957
|
+
return [sub_trait.from_string(s) for s in parts]
|
|
958
|
+
else:
|
|
959
|
+
return parts
|
|
960
|
+
|
|
961
|
+
if isinstance(trait, traitlets.List):
|
|
962
|
+
default_value_fun = default_value_list
|
|
963
|
+
else: # pragma: no cover
|
|
964
|
+
default_value_fun = default_value_simple
|
|
965
|
+
|
|
966
|
+
default_value_fun.__name__ = trait_name + "_default"
|
|
967
|
+
return traitlets.default(trait_name)(default_value_fun)
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
def relay_future(
|
|
971
|
+
dest: Union[asyncio.Future, concurrent.futures.Future],
|
|
972
|
+
src: Union[asyncio.Future, concurrent.futures.Future],
|
|
973
|
+
) -> None:
|
|
974
|
+
def cb(fut: Union[asyncio.Future, concurrent.futures.Future]):
|
|
975
|
+
try:
|
|
976
|
+
dest.set_result(fut.result())
|
|
977
|
+
except BaseException as ex:
|
|
978
|
+
dest.set_exception(ex)
|
|
979
|
+
|
|
980
|
+
src.add_done_callback(cb)
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
_arrow_type_constructors = {}
|
|
984
|
+
if pa:
|
|
985
|
+
_arrow_type_constructors = {
|
|
986
|
+
"bool": pa.bool_,
|
|
987
|
+
"list": lambda x: pa.list_(dict(x)["item"]),
|
|
988
|
+
"map": lambda x: pa.map_(*x),
|
|
989
|
+
"struct": pa.struct,
|
|
990
|
+
"fixed_size_binary": pa.binary,
|
|
991
|
+
"halffloat": pa.float16,
|
|
992
|
+
"float": pa.float32,
|
|
993
|
+
"double": pa.float64,
|
|
994
|
+
"decimal": pa.decimal128,
|
|
995
|
+
}
|
|
996
|
+
_plain_arrow_types = """
|
|
997
|
+
null
|
|
998
|
+
int8 int16 int32 int64
|
|
999
|
+
uint8 uint16 uint32 uint64
|
|
1000
|
+
float16 float32 float64
|
|
1001
|
+
date32 date64
|
|
1002
|
+
decimal128 decimal256
|
|
1003
|
+
string utf8 binary
|
|
1004
|
+
time32 time64 duration timestamp
|
|
1005
|
+
month_day_nano_interval
|
|
1006
|
+
"""
|
|
1007
|
+
for _type_name in _plain_arrow_types.split():
|
|
1008
|
+
try:
|
|
1009
|
+
_arrow_type_constructors[_type_name] = getattr(pa, _type_name)
|
|
1010
|
+
except AttributeError: # pragma: no cover
|
|
1011
|
+
pass
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
def arrow_type_from_str(type_str: str) -> pa.DataType:
|
|
1015
|
+
"""
|
|
1016
|
+
Convert arrow type representations (for inst., list<item: int64>)
|
|
1017
|
+
into arrow DataType instances
|
|
1018
|
+
"""
|
|
1019
|
+
# enable consecutive brackets to be tokenized
|
|
1020
|
+
type_str = type_str.replace("<", "< ").replace(">", " >")
|
|
1021
|
+
token_iter = pytokenize.tokenize(io.BytesIO(type_str.encode()).readline)
|
|
1022
|
+
value_stack, op_stack = [], []
|
|
1023
|
+
|
|
1024
|
+
def _pop_make_type(with_args: bool = False, combined: bool = True) -> None:
|
|
1025
|
+
"""
|
|
1026
|
+
Pops tops of value stacks, creates a DataType instance and push back
|
|
1027
|
+
|
|
1028
|
+
Parameters
|
|
1029
|
+
----------
|
|
1030
|
+
with_args: bool
|
|
1031
|
+
if True, will contain next item (parameter list) in
|
|
1032
|
+
the value stack as parameters
|
|
1033
|
+
combined: bool
|
|
1034
|
+
if True, will use first element of the top of the value stack
|
|
1035
|
+
in DataType constructors
|
|
1036
|
+
"""
|
|
1037
|
+
args = () if not with_args else (value_stack.pop(-1),)
|
|
1038
|
+
if not combined:
|
|
1039
|
+
args = args[0]
|
|
1040
|
+
type_name = value_stack.pop(-1)
|
|
1041
|
+
if isinstance(type_name, pa.DataType):
|
|
1042
|
+
value_stack.append(type_name)
|
|
1043
|
+
elif type_name in _arrow_type_constructors:
|
|
1044
|
+
value_stack.append(_arrow_type_constructors[type_name](*args))
|
|
1045
|
+
else: # pragma: no cover
|
|
1046
|
+
value_stack.append(type_name)
|
|
1047
|
+
|
|
1048
|
+
for token in token_iter:
|
|
1049
|
+
if token.type == pytokenize.OP:
|
|
1050
|
+
if token.string == ":":
|
|
1051
|
+
op_stack.append(token.string)
|
|
1052
|
+
elif token.string == ",":
|
|
1053
|
+
# gather previous sub-types
|
|
1054
|
+
if op_stack[-1] in ("<", ":"):
|
|
1055
|
+
_pop_make_type()
|
|
1056
|
+
|
|
1057
|
+
if op_stack[-1] == ":":
|
|
1058
|
+
# parameterized sub-types need to be represented as tuples
|
|
1059
|
+
op_stack.pop(-1)
|
|
1060
|
+
values = value_stack[-2:]
|
|
1061
|
+
value_stack = value_stack[:-2]
|
|
1062
|
+
value_stack.append(tuple(values))
|
|
1063
|
+
# put generated item into the parameter list
|
|
1064
|
+
val = value_stack.pop(-1)
|
|
1065
|
+
value_stack[-1].append(val)
|
|
1066
|
+
elif token.string in ("<", "[", "("):
|
|
1067
|
+
# pushes an empty parameter list for future use
|
|
1068
|
+
value_stack.append([])
|
|
1069
|
+
op_stack.append(token.string)
|
|
1070
|
+
elif token.string in (")", "]"):
|
|
1071
|
+
# put generated item into the parameter list
|
|
1072
|
+
val = value_stack.pop(-1)
|
|
1073
|
+
value_stack[-1].append(val)
|
|
1074
|
+
# make DataType (i.e., fixed_size_binary / decimal) given args
|
|
1075
|
+
_pop_make_type(with_args=True, combined=False)
|
|
1076
|
+
op_stack.pop(-1)
|
|
1077
|
+
elif token.string == ">":
|
|
1078
|
+
_pop_make_type()
|
|
1079
|
+
|
|
1080
|
+
if op_stack[-1] == ":":
|
|
1081
|
+
# parameterized sub-types need to be represented as tuples
|
|
1082
|
+
op_stack.pop(-1)
|
|
1083
|
+
values = value_stack[-2:]
|
|
1084
|
+
value_stack = value_stack[:-2]
|
|
1085
|
+
value_stack.append(tuple(values))
|
|
1086
|
+
|
|
1087
|
+
# put generated item into the parameter list
|
|
1088
|
+
val = value_stack.pop(-1)
|
|
1089
|
+
value_stack[-1].append(val)
|
|
1090
|
+
# make DataType (i.e., list / map / struct) given args
|
|
1091
|
+
_pop_make_type(True)
|
|
1092
|
+
op_stack.pop(-1)
|
|
1093
|
+
elif token.type == pytokenize.NAME:
|
|
1094
|
+
value_stack.append(token.string)
|
|
1095
|
+
elif token.type == pytokenize.NUMBER:
|
|
1096
|
+
value_stack.append(int(token.string))
|
|
1097
|
+
elif token.type == pytokenize.ENDMARKER:
|
|
1098
|
+
# make final type
|
|
1099
|
+
_pop_make_type()
|
|
1100
|
+
if len(value_stack) > 1:
|
|
1101
|
+
raise ValueError(f"Cannot parse type {type_str}")
|
|
1102
|
+
return value_stack[-1]
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
def get_python_tag():
|
|
1106
|
+
# todo add implementation suffix for non-GIL tags when PEP703 is ready
|
|
1107
|
+
version_info = sys.version_info
|
|
1108
|
+
return f"cp{version_info[0]}{version_info[1]}"
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
def get_item_if_scalar(val: Any) -> Any:
|
|
1112
|
+
if isinstance(val, np.ndarray) and val.shape == ():
|
|
1113
|
+
return val.item()
|
|
1114
|
+
return val
|