maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
maxframe/codegen.py
ADDED
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import abc
|
|
16
|
+
import base64
|
|
17
|
+
import dataclasses
|
|
18
|
+
import logging
|
|
19
|
+
from collections import defaultdict
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
|
|
22
|
+
|
|
23
|
+
from odps.types import OdpsSchema
|
|
24
|
+
from odps.utils import camel_to_underline
|
|
25
|
+
|
|
26
|
+
from .core import OperatorType, Tileable, TileableGraph
|
|
27
|
+
from .core.operator import Fetch
|
|
28
|
+
from .extension import iter_extensions
|
|
29
|
+
from .lib import wrapped_pickle as pickle
|
|
30
|
+
from .odpsio import build_dataframe_table_meta
|
|
31
|
+
from .odpsio.schema import pandas_to_odps_schema
|
|
32
|
+
from .protocol import DataFrameTableMeta, ResultInfo
|
|
33
|
+
from .serialization import PickleContainer
|
|
34
|
+
from .serialization.serializables import Serializable, StringField
|
|
35
|
+
from .typing_ import PandasObjectTypes
|
|
36
|
+
from .udf import MarkedFunction, PythonPackOptions
|
|
37
|
+
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from odpsctx import ODPSSessionContext
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclasses.dataclass
|
|
45
|
+
class CodeGenResult:
|
|
46
|
+
code: str
|
|
47
|
+
input_key_to_variables: Dict[str, str]
|
|
48
|
+
output_key_to_variables: Dict[str, str]
|
|
49
|
+
output_key_to_result_infos: Dict[str, ResultInfo]
|
|
50
|
+
constants: Dict[str, Any]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class AbstractUDF(Serializable):
|
|
54
|
+
_session_id: str = StringField("session_id")
|
|
55
|
+
|
|
56
|
+
def __init__(self, session_id: Optional[str] = None, **kw):
|
|
57
|
+
super().__init__(_session_id=session_id, **kw)
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def name(self) -> str:
|
|
61
|
+
return camel_to_underline(type(self).__name__)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def session_id(self):
|
|
65
|
+
return getattr(self, "_session_id", None)
|
|
66
|
+
|
|
67
|
+
@session_id.setter
|
|
68
|
+
def session_id(self, value: str):
|
|
69
|
+
self._session_id = value
|
|
70
|
+
|
|
71
|
+
@abc.abstractmethod
|
|
72
|
+
def register(self, odps: "ODPSSessionContext", overwrite: bool = False):
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
@abc.abstractmethod
|
|
76
|
+
def unregister(self, odps: "ODPSSessionContext"):
|
|
77
|
+
raise NotImplementedError
|
|
78
|
+
|
|
79
|
+
@abc.abstractmethod
|
|
80
|
+
def collect_pythonpack(self) -> List[PythonPackOptions]:
|
|
81
|
+
raise NotImplementedError
|
|
82
|
+
|
|
83
|
+
@abc.abstractmethod
|
|
84
|
+
def load_pythonpack_resources(self, odps_ctx: "ODPSSessionContext") -> None:
|
|
85
|
+
raise NotImplementedError
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class UserCodeMixin:
|
|
89
|
+
@classmethod
|
|
90
|
+
def obj_to_python_expr(cls, obj: Any = None) -> str:
|
|
91
|
+
"""
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
obj
|
|
95
|
+
The object to convert to python expr.
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
str :
|
|
99
|
+
The str type content equals to the object when use in the python code directly.
|
|
100
|
+
"""
|
|
101
|
+
if obj is None:
|
|
102
|
+
return "None"
|
|
103
|
+
|
|
104
|
+
if isinstance(obj, (int, float)):
|
|
105
|
+
return repr(obj)
|
|
106
|
+
|
|
107
|
+
if isinstance(obj, bool):
|
|
108
|
+
return "True" if obj else "False"
|
|
109
|
+
|
|
110
|
+
if isinstance(obj, bytes):
|
|
111
|
+
base64_bytes = base64.b64encode(obj)
|
|
112
|
+
return f"base64.b64decode({base64_bytes})"
|
|
113
|
+
|
|
114
|
+
if isinstance(obj, str):
|
|
115
|
+
return repr(obj)
|
|
116
|
+
|
|
117
|
+
if isinstance(obj, list):
|
|
118
|
+
return (
|
|
119
|
+
f"[{', '.join([cls.obj_to_python_expr(element) for element in obj])}]"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if isinstance(obj, dict):
|
|
123
|
+
items = (
|
|
124
|
+
f"{repr(key)}: {cls.obj_to_python_expr(value)}"
|
|
125
|
+
for key, value in obj.items()
|
|
126
|
+
)
|
|
127
|
+
return f"{{{', '.join(items)}}}"
|
|
128
|
+
|
|
129
|
+
if isinstance(obj, tuple):
|
|
130
|
+
return f"({', '.join([cls.obj_to_python_expr(sub_obj) for sub_obj in obj])}{',' if len(obj) == 1 else ''})"
|
|
131
|
+
|
|
132
|
+
if isinstance(obj, set):
|
|
133
|
+
return (
|
|
134
|
+
f"{{{', '.join([cls.obj_to_python_expr(sub_obj) for sub_obj in obj])}}}"
|
|
135
|
+
if obj
|
|
136
|
+
else "set()"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if isinstance(obj, PickleContainer):
|
|
140
|
+
return UserCodeMixin.generate_pickled_codes(obj, None)
|
|
141
|
+
|
|
142
|
+
raise ValueError(f"not support arg type {type(obj)}")
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def generate_pickled_codes(
|
|
146
|
+
cls,
|
|
147
|
+
code_to_pickle: Any,
|
|
148
|
+
unpicked_data_var_name: Union[str, None] = "pickled_data",
|
|
149
|
+
) -> str:
|
|
150
|
+
"""
|
|
151
|
+
Generate pickled codes. The final pickled variable is called 'pickled_data'.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
code_to_pickle: Any
|
|
156
|
+
The code to be pickled.
|
|
157
|
+
unpicked_data_var_name: str
|
|
158
|
+
The variables in code used to hold the loads object from the cloudpickle
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
str :
|
|
163
|
+
The code snippets of pickling, the final variable is called 'pickled_data' by default.
|
|
164
|
+
"""
|
|
165
|
+
pickled, buffers = cls.dump_pickled_data(code_to_pickle)
|
|
166
|
+
pickle_loads_expr = f"cloudpickle.loads({cls.obj_to_python_expr(pickled)}, buffers={cls.obj_to_python_expr(buffers)})"
|
|
167
|
+
if unpicked_data_var_name:
|
|
168
|
+
return f"{unpicked_data_var_name} = {pickle_loads_expr}"
|
|
169
|
+
|
|
170
|
+
return pickle_loads_expr
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def dump_pickled_data(
|
|
174
|
+
code_to_pickle: Any,
|
|
175
|
+
) -> Tuple[List[bytes], List[bytes]]:
|
|
176
|
+
if isinstance(code_to_pickle, MarkedFunction):
|
|
177
|
+
code_to_pickle = code_to_pickle.func
|
|
178
|
+
if isinstance(code_to_pickle, PickleContainer):
|
|
179
|
+
buffers = code_to_pickle.get_buffers()
|
|
180
|
+
pickled = buffers[0]
|
|
181
|
+
buffers = buffers[1:]
|
|
182
|
+
else:
|
|
183
|
+
pickled = pickle.dumps(code_to_pickle, protocol=pickle.DEFAULT_PROTOCOL)
|
|
184
|
+
buffers = []
|
|
185
|
+
return pickled, buffers
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class BigDagCodeContext(metaclass=abc.ABCMeta):
|
|
189
|
+
def __init__(self, session_id: str = None, subdag_id: str = None):
|
|
190
|
+
self._session_id = session_id
|
|
191
|
+
self._subdag_id = subdag_id
|
|
192
|
+
self._tileable_key_to_variables = dict()
|
|
193
|
+
self.constants = dict()
|
|
194
|
+
self._data_table_meta_cache = dict()
|
|
195
|
+
self._odps_schema_cache = dict()
|
|
196
|
+
self._udfs = dict()
|
|
197
|
+
self._tileable_key_to_result_infos = dict()
|
|
198
|
+
self._next_var_id = 0
|
|
199
|
+
self._next_const_id = 0
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def session_id(self) -> str:
|
|
203
|
+
return self._session_id
|
|
204
|
+
|
|
205
|
+
def register_udf(self, udf: AbstractUDF):
|
|
206
|
+
udf.session_id = self._session_id
|
|
207
|
+
self._udfs[udf.name] = udf
|
|
208
|
+
|
|
209
|
+
def get_udfs(self) -> List[AbstractUDF]:
|
|
210
|
+
return list(self._udfs.values())
|
|
211
|
+
|
|
212
|
+
def get_tileable_variable(self, tileable: Tileable) -> str:
|
|
213
|
+
try:
|
|
214
|
+
return self._tileable_key_to_variables[tileable.key]
|
|
215
|
+
except KeyError:
|
|
216
|
+
var_name = self._tileable_key_to_variables[
|
|
217
|
+
tileable.key
|
|
218
|
+
] = self.next_var_name()
|
|
219
|
+
return var_name
|
|
220
|
+
|
|
221
|
+
def next_var_name(self) -> str:
|
|
222
|
+
var_name = f"var_{self._next_var_id}"
|
|
223
|
+
self._next_var_id += 1
|
|
224
|
+
return var_name
|
|
225
|
+
|
|
226
|
+
def get_odps_schema(
|
|
227
|
+
self, data: PandasObjectTypes, unknown_as_string: bool = False
|
|
228
|
+
) -> OdpsSchema:
|
|
229
|
+
"""
|
|
230
|
+
Get the corresponding ODPS schema of the input df_obj.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
data :
|
|
235
|
+
The pandas data object.
|
|
236
|
+
unknown_as_string :
|
|
237
|
+
Whether mapping the unknown data type to a temp string value.
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
OdpsSchema :
|
|
242
|
+
The OdpsSchema of df_obj.
|
|
243
|
+
"""
|
|
244
|
+
if data.key not in self._odps_schema_cache:
|
|
245
|
+
odps_schema, table_meta = pandas_to_odps_schema(data, unknown_as_string)
|
|
246
|
+
self._data_table_meta_cache[data.key] = table_meta
|
|
247
|
+
self._odps_schema_cache[data.key] = odps_schema
|
|
248
|
+
return self._odps_schema_cache[data.key]
|
|
249
|
+
|
|
250
|
+
def get_pandas_data_table_meta(self, data: PandasObjectTypes) -> DataFrameTableMeta:
|
|
251
|
+
if data.key not in self._data_table_meta_cache:
|
|
252
|
+
self._data_table_meta_cache[data.key] = build_dataframe_table_meta(data)
|
|
253
|
+
return self._data_table_meta_cache[data.key]
|
|
254
|
+
|
|
255
|
+
def register_operator_constants(self, const_val, var_name: str = None) -> str:
|
|
256
|
+
if var_name is None:
|
|
257
|
+
if (
|
|
258
|
+
isinstance(const_val, (int, str, bytes, bool, float))
|
|
259
|
+
or const_val is None
|
|
260
|
+
):
|
|
261
|
+
return repr(const_val)
|
|
262
|
+
var_name = f"const_{self._next_const_id}"
|
|
263
|
+
self._next_const_id += 1
|
|
264
|
+
|
|
265
|
+
self.constants[var_name] = const_val
|
|
266
|
+
return var_name
|
|
267
|
+
|
|
268
|
+
def put_tileable_result_info(
|
|
269
|
+
self, tileable: Tileable, result_info: ResultInfo
|
|
270
|
+
) -> None:
|
|
271
|
+
self._tileable_key_to_result_infos[tileable.key] = result_info
|
|
272
|
+
|
|
273
|
+
def get_tileable_result_infos(self) -> Dict[str, ResultInfo]:
|
|
274
|
+
return self._tileable_key_to_result_infos
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class EngineAcceptance(Enum):
|
|
278
|
+
"""
|
|
279
|
+
DENY: The operator is not accepted by the current engine.
|
|
280
|
+
ACCEPT: The operator is accepted by the current engine, and doesn't break from here.
|
|
281
|
+
BREAK: The operator is accepted by the current engine, but should break from here.
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
DENY = 0
|
|
285
|
+
ACCEPT = 1
|
|
286
|
+
BREAK = 2
|
|
287
|
+
|
|
288
|
+
@classmethod
|
|
289
|
+
def _missing_(cls, pred: bool) -> "EngineAcceptance":
|
|
290
|
+
"""
|
|
291
|
+
A convenience method to get ACCEPT or DENY result via the input predicate.
|
|
292
|
+
|
|
293
|
+
Parameters
|
|
294
|
+
----------
|
|
295
|
+
pred : bool
|
|
296
|
+
The predicate variable.
|
|
297
|
+
|
|
298
|
+
Returns
|
|
299
|
+
-------
|
|
300
|
+
EngineAcceptance :
|
|
301
|
+
Returns ACCEPT if the predicate is true, otherwise returns DENY.
|
|
302
|
+
"""
|
|
303
|
+
return cls.ACCEPT if pred else cls.DENY
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class BigDagOperatorAdapter(metaclass=abc.ABCMeta):
|
|
307
|
+
# todo handle refcount issue when generated code is being executed
|
|
308
|
+
def accepts(self, op: OperatorType) -> EngineAcceptance:
|
|
309
|
+
return EngineAcceptance.ACCEPT
|
|
310
|
+
|
|
311
|
+
@abc.abstractmethod
|
|
312
|
+
def generate_code(self, op: OperatorType, context: BigDagCodeContext) -> List[str]:
|
|
313
|
+
raise NotImplementedError
|
|
314
|
+
|
|
315
|
+
def generate_comment(
|
|
316
|
+
self, op: OperatorType, context: BigDagCodeContext
|
|
317
|
+
) -> List[str]:
|
|
318
|
+
"""
|
|
319
|
+
Generate the comment codes before actual ones.
|
|
320
|
+
|
|
321
|
+
Parameters
|
|
322
|
+
----------
|
|
323
|
+
op : OperatorType
|
|
324
|
+
The operator instance.
|
|
325
|
+
context : BigDagCodeContext
|
|
326
|
+
The BigDagCodeContext instance.
|
|
327
|
+
|
|
328
|
+
Returns
|
|
329
|
+
-------
|
|
330
|
+
result: List[str]
|
|
331
|
+
The comment codes, one per line.
|
|
332
|
+
"""
|
|
333
|
+
return list()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
_engine_to_codegen: Dict[str, Type["BigDagCodeGenerator"]] = dict()
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def register_engine_codegen(type_: Type["BigDagCodeGenerator"]):
|
|
340
|
+
_engine_to_codegen[type_.engine_type] = type_
|
|
341
|
+
return type_
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
BUILTIN_ENGINE_SPE = "SPE"
|
|
345
|
+
BUILTIN_ENGINE_MCSQL = "MCSQL"
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
349
|
+
_context: BigDagCodeContext
|
|
350
|
+
|
|
351
|
+
engine_type: Optional[str] = None
|
|
352
|
+
engine_priority: int = 0
|
|
353
|
+
_extension_loaded = False
|
|
354
|
+
|
|
355
|
+
def __init__(self, session_id: str, subdag_id: str = None):
|
|
356
|
+
self._session_id = session_id
|
|
357
|
+
self._subdag_id = subdag_id
|
|
358
|
+
self._context = self._init_context(session_id, subdag_id)
|
|
359
|
+
|
|
360
|
+
@classmethod
|
|
361
|
+
def _load_engine_extensions(cls):
|
|
362
|
+
if cls._extension_loaded:
|
|
363
|
+
return
|
|
364
|
+
for name, ep in iter_extensions():
|
|
365
|
+
_engine_to_codegen[name.upper()] = ep.get_codegen()
|
|
366
|
+
cls._extension_loaded = True
|
|
367
|
+
|
|
368
|
+
@classmethod
|
|
369
|
+
def get_engine_types(cls) -> List[str]:
|
|
370
|
+
cls._load_engine_extensions()
|
|
371
|
+
engines = sorted(
|
|
372
|
+
_engine_to_codegen.values(), key=lambda x: x.engine_priority, reverse=True
|
|
373
|
+
)
|
|
374
|
+
return [e.engine_type for e in engines]
|
|
375
|
+
|
|
376
|
+
@classmethod
|
|
377
|
+
def get_by_engine_type(cls, engine_type: str) -> Type["BigDagCodeGenerator"]:
|
|
378
|
+
cls._load_engine_extensions()
|
|
379
|
+
return _engine_to_codegen[engine_type]
|
|
380
|
+
|
|
381
|
+
@abc.abstractmethod
|
|
382
|
+
def get_op_adapter(
|
|
383
|
+
self, op_type: Type[OperatorType]
|
|
384
|
+
) -> Type[BigDagOperatorAdapter]:
|
|
385
|
+
raise NotImplementedError
|
|
386
|
+
|
|
387
|
+
@abc.abstractmethod
|
|
388
|
+
def _init_context(self, session_id: str, subdag_id: str) -> BigDagCodeContext:
|
|
389
|
+
raise NotImplementedError
|
|
390
|
+
|
|
391
|
+
def _generate_comments(
|
|
392
|
+
self, op: OperatorType, adapter: BigDagOperatorAdapter
|
|
393
|
+
) -> List[str]:
|
|
394
|
+
return adapter.generate_comment(op, self._context)
|
|
395
|
+
|
|
396
|
+
def _generate_pre_op_code(self, op: OperatorType) -> List[str]:
|
|
397
|
+
return []
|
|
398
|
+
|
|
399
|
+
def _generate_delete_code(self, var_name: str) -> List[str]:
|
|
400
|
+
return []
|
|
401
|
+
|
|
402
|
+
def generate_code(self, dag: TileableGraph) -> List[str]:
|
|
403
|
+
"""
|
|
404
|
+
Generate the code of the input dag.
|
|
405
|
+
|
|
406
|
+
Parameters
|
|
407
|
+
----------
|
|
408
|
+
dag : TileableGraph
|
|
409
|
+
The input DAG instance.
|
|
410
|
+
|
|
411
|
+
Returns
|
|
412
|
+
-------
|
|
413
|
+
List[str] :
|
|
414
|
+
The code lines.
|
|
415
|
+
"""
|
|
416
|
+
code_lines = []
|
|
417
|
+
visited_op_key = set()
|
|
418
|
+
result_key_set = set(t.key for t in dag.result_tileables)
|
|
419
|
+
out_refcounts = dict()
|
|
420
|
+
for tileable in dag.topological_iter():
|
|
421
|
+
op: OperatorType = tileable.op
|
|
422
|
+
if op.key in visited_op_key or isinstance(op, Fetch):
|
|
423
|
+
continue
|
|
424
|
+
|
|
425
|
+
visited_op_key.add(op.key)
|
|
426
|
+
|
|
427
|
+
adapter = self.get_op_adapter(type(op))()
|
|
428
|
+
code_lines.extend(self._generate_pre_op_code(op))
|
|
429
|
+
code_lines.extend(self._generate_comments(op, adapter))
|
|
430
|
+
code_lines.extend(adapter.generate_code(op, self._context))
|
|
431
|
+
code_lines.append("") # Append an empty line to separate operators
|
|
432
|
+
|
|
433
|
+
# record refcounts
|
|
434
|
+
for out_t in op.outputs:
|
|
435
|
+
if out_t.key in result_key_set:
|
|
436
|
+
continue
|
|
437
|
+
if dag.count_successors(out_t) == 0:
|
|
438
|
+
delete_code = self._generate_delete_code(
|
|
439
|
+
self._context.get_tileable_variable(out_t)
|
|
440
|
+
)
|
|
441
|
+
code_lines.extend(delete_code)
|
|
442
|
+
else:
|
|
443
|
+
out_refcounts[out_t.key] = dag.count_successors(out_t)
|
|
444
|
+
|
|
445
|
+
# check if refs of inputs are no longer needed
|
|
446
|
+
for inp_t in op.inputs:
|
|
447
|
+
if inp_t.key not in out_refcounts:
|
|
448
|
+
continue
|
|
449
|
+
out_refcounts[inp_t.key] -= 1
|
|
450
|
+
if out_refcounts[inp_t.key] == 0:
|
|
451
|
+
delete_code = self._generate_delete_code(
|
|
452
|
+
self._context.get_tileable_variable(inp_t)
|
|
453
|
+
)
|
|
454
|
+
code_lines.extend(delete_code)
|
|
455
|
+
out_refcounts.pop(inp_t.key)
|
|
456
|
+
|
|
457
|
+
return code_lines
|
|
458
|
+
|
|
459
|
+
def generate(self, dag: TileableGraph) -> CodeGenResult:
|
|
460
|
+
code_lines = self.generate_code(dag)
|
|
461
|
+
input_key_to_vars = dict()
|
|
462
|
+
for tileable in dag.topological_iter():
|
|
463
|
+
op: OperatorType = tileable.op
|
|
464
|
+
if isinstance(op, Fetch):
|
|
465
|
+
input_key_to_vars[
|
|
466
|
+
op.outputs[0].key
|
|
467
|
+
] = self._context.get_tileable_variable(tileable)
|
|
468
|
+
|
|
469
|
+
result_variables = {
|
|
470
|
+
t.key: self._context.get_tileable_variable(t) for t in dag.results
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
return CodeGenResult(
|
|
474
|
+
code="\n".join(code_lines),
|
|
475
|
+
input_key_to_variables=input_key_to_vars,
|
|
476
|
+
output_key_to_variables=result_variables,
|
|
477
|
+
constants=self._context.constants,
|
|
478
|
+
output_key_to_result_infos=self._context.get_tileable_result_infos(),
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
def run_pythonpacks(
|
|
482
|
+
self,
|
|
483
|
+
odps_ctx: "ODPSSessionContext",
|
|
484
|
+
python_tag: str,
|
|
485
|
+
is_production: bool = False,
|
|
486
|
+
schedule_id: Optional[str] = None,
|
|
487
|
+
hints: Optional[dict] = None,
|
|
488
|
+
priority: Optional[int] = None,
|
|
489
|
+
) -> Dict[str, PythonPackOptions]:
|
|
490
|
+
key_to_packs = defaultdict(list)
|
|
491
|
+
for udf in self._context.get_udfs():
|
|
492
|
+
for pack in udf.collect_pythonpack():
|
|
493
|
+
key_to_packs[pack.key].append(pack)
|
|
494
|
+
distinct_packs = []
|
|
495
|
+
for packs in key_to_packs.values():
|
|
496
|
+
distinct_packs.append(packs[0])
|
|
497
|
+
|
|
498
|
+
inst_id_to_req = {}
|
|
499
|
+
for pack in distinct_packs:
|
|
500
|
+
inst = odps_ctx.run_pythonpack(
|
|
501
|
+
requirements=pack.requirements,
|
|
502
|
+
prefer_binary=pack.prefer_binary,
|
|
503
|
+
pre_release=pack.pre_release,
|
|
504
|
+
force_rebuild=pack.force_rebuild,
|
|
505
|
+
python_tag=python_tag,
|
|
506
|
+
is_production=is_production,
|
|
507
|
+
schedule_id=schedule_id,
|
|
508
|
+
hints=hints,
|
|
509
|
+
priority=priority,
|
|
510
|
+
)
|
|
511
|
+
# fulfill instance id of pythonpacks with same keys
|
|
512
|
+
for same_pack in key_to_packs[pack.key]:
|
|
513
|
+
same_pack.pack_instance_id = inst.id
|
|
514
|
+
inst_id_to_req[inst.id] = pack
|
|
515
|
+
return inst_id_to_req
|
|
516
|
+
|
|
517
|
+
def register_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
518
|
+
for udf in self._context.get_udfs():
|
|
519
|
+
logger.info("[Session %s] Registering UDF %s", self._session_id, udf.name)
|
|
520
|
+
udf.register(odps_ctx, True)
|
|
521
|
+
|
|
522
|
+
def unregister_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
523
|
+
for udf in self._context.get_udfs():
|
|
524
|
+
logger.info("[Session %s] Unregistering UDF %s", self._session_id, udf.name)
|
|
525
|
+
udf.unregister(odps_ctx)
|
|
526
|
+
|
|
527
|
+
def get_udfs(self) -> List[AbstractUDF]:
|
|
528
|
+
return self._context.get_udfs()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .config import AttributeDict, option_context, options
|