maxframe 0.1.0b5__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,837 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
import inspect
|
|
17
|
+
from collections import OrderedDict
|
|
18
|
+
from typing import Any, Dict, List, NamedTuple, Optional, Tuple
|
|
19
|
+
|
|
20
|
+
import msgpack
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from ...core import ENTITY_TYPE, enter_mode, is_build_mode, is_kernel_mode
|
|
25
|
+
from ...serialization.serializables import (
|
|
26
|
+
AnyField,
|
|
27
|
+
BoolField,
|
|
28
|
+
DataTypeField,
|
|
29
|
+
Int32Field,
|
|
30
|
+
StringField,
|
|
31
|
+
)
|
|
32
|
+
from ...typing_ import TileableType
|
|
33
|
+
from ...utils import get_item_if_scalar, pd_release_version, tokenize
|
|
34
|
+
from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
|
|
35
|
+
from ..utils import (
|
|
36
|
+
build_df,
|
|
37
|
+
build_empty_df,
|
|
38
|
+
build_empty_series,
|
|
39
|
+
build_series,
|
|
40
|
+
parse_index,
|
|
41
|
+
validate_axis,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# in pandas<1.3, when aggregating with multiple levels and numeric_only is True,
|
|
45
|
+
# object cols not ignored with min-max funcs
|
|
46
|
+
_level_reduction_keep_object = pd_release_version[:2] < (1, 3)
|
|
47
|
+
# in pandas>=1.3, when dataframes are reduced into series, mixture of float and bool
|
|
48
|
+
# results in object.
|
|
49
|
+
_reduce_bool_as_object = pd_release_version[:2] != (1, 2)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DataFrameReductionOperator(DataFrameOperator):
|
|
53
|
+
axis = AnyField("axis", default=None)
|
|
54
|
+
skipna = BoolField("skipna", default=True)
|
|
55
|
+
level = AnyField("level", default=None)
|
|
56
|
+
numeric_only = BoolField("numeric_only", default=None)
|
|
57
|
+
bool_only = BoolField("bool_only", default=None)
|
|
58
|
+
min_count = Int32Field("min_count", default=None)
|
|
59
|
+
method = StringField("method", default=None)
|
|
60
|
+
|
|
61
|
+
dtype = DataTypeField("dtype", default=None)
|
|
62
|
+
|
|
63
|
+
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
64
|
+
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def is_atomic(self):
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
def get_reduction_args(self, axis=None):
|
|
71
|
+
args = dict(skipna=self.skipna)
|
|
72
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
73
|
+
args["axis"] = axis
|
|
74
|
+
if self.numeric_only is not None:
|
|
75
|
+
args["numeric_only"] = self.numeric_only
|
|
76
|
+
if self.bool_only is not None:
|
|
77
|
+
args["bool_only"] = self.bool_only
|
|
78
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class DataFrameCumReductionOperator(DataFrameOperator):
|
|
82
|
+
axis = AnyField("axis", default=None)
|
|
83
|
+
skipna = BoolField("skipna", default=None)
|
|
84
|
+
|
|
85
|
+
dtype = DataTypeField("dtype", default=None)
|
|
86
|
+
|
|
87
|
+
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
88
|
+
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _default_agg_fun(value, func_name=None, **kw):
|
|
92
|
+
if value.ndim == 1:
|
|
93
|
+
kw.pop("bool_only", None)
|
|
94
|
+
kw.pop("numeric_only", None)
|
|
95
|
+
return getattr(value, func_name)(**kw)
|
|
96
|
+
else:
|
|
97
|
+
return getattr(value, func_name)(**kw)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@functools.lru_cache(100)
|
|
101
|
+
def _get_series_reduction_dtype(
|
|
102
|
+
dtype,
|
|
103
|
+
func_name,
|
|
104
|
+
axis=None,
|
|
105
|
+
bool_only=False,
|
|
106
|
+
skipna=True,
|
|
107
|
+
numeric_only=False,
|
|
108
|
+
):
|
|
109
|
+
test_series = build_series(dtype=dtype, ensure_string=True)
|
|
110
|
+
if func_name == "count":
|
|
111
|
+
reduced = test_series.count()
|
|
112
|
+
elif func_name == "nunique":
|
|
113
|
+
reduced = test_series.nunique()
|
|
114
|
+
elif func_name in ("all", "any"):
|
|
115
|
+
reduced = getattr(test_series, func_name)(axis=axis, bool_only=bool_only)
|
|
116
|
+
elif func_name == "size":
|
|
117
|
+
reduced = test_series.size
|
|
118
|
+
elif func_name == "str_concat":
|
|
119
|
+
reduced = pd.Series([test_series.str.cat()])
|
|
120
|
+
else:
|
|
121
|
+
reduced = getattr(test_series, func_name)(
|
|
122
|
+
axis=axis, skipna=skipna, numeric_only=numeric_only
|
|
123
|
+
)
|
|
124
|
+
return pd.Series(reduced).dtype
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@functools.lru_cache(100)
|
|
128
|
+
def _get_df_reduction_dtype(
|
|
129
|
+
dtype, func_name, axis=None, bool_only=False, skipna=False, numeric_only=False
|
|
130
|
+
):
|
|
131
|
+
test_df = build_series(dtype=dtype, ensure_string=True).to_frame()
|
|
132
|
+
if func_name == "count":
|
|
133
|
+
reduced = getattr(test_df, func_name)(axis=axis, numeric_only=numeric_only)
|
|
134
|
+
elif func_name == "nunique":
|
|
135
|
+
reduced = getattr(test_df, func_name)(axis=axis)
|
|
136
|
+
elif func_name in ("all", "any"):
|
|
137
|
+
reduced = getattr(test_df, func_name)(axis=axis, bool_only=bool_only)
|
|
138
|
+
elif func_name == "str_concat":
|
|
139
|
+
reduced = test_df.apply(lambda s: s.str.cat(), axis=axis)
|
|
140
|
+
else:
|
|
141
|
+
reduced = getattr(test_df, func_name)(
|
|
142
|
+
axis=axis, skipna=skipna, numeric_only=numeric_only
|
|
143
|
+
)
|
|
144
|
+
if len(reduced) == 0:
|
|
145
|
+
return None
|
|
146
|
+
return reduced.dtype
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
150
|
+
@classmethod
|
|
151
|
+
def get_reduction_callable(cls, op):
|
|
152
|
+
func_name = getattr(op, "_func_name")
|
|
153
|
+
kw = dict(
|
|
154
|
+
skipna=op.skipna, numeric_only=op.numeric_only, bool_only=op.bool_only
|
|
155
|
+
)
|
|
156
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
157
|
+
fun = functools.partial(_default_agg_fun, func_name=func_name, **kw)
|
|
158
|
+
fun.__name__ = func_name
|
|
159
|
+
return fun
|
|
160
|
+
|
|
161
|
+
def _call_groupby_level(self, df, level):
|
|
162
|
+
return df.groupby(level=level).agg(
|
|
163
|
+
self.get_reduction_callable(self), method=self.method
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def _call_dataframe(self, df):
|
|
167
|
+
axis = getattr(self, "axis", None) or 0
|
|
168
|
+
level = getattr(self, "level", None)
|
|
169
|
+
skipna = getattr(self, "skipna", True)
|
|
170
|
+
numeric_only = getattr(self, "numeric_only", None)
|
|
171
|
+
bool_only = getattr(self, "bool_only", None)
|
|
172
|
+
self.axis = axis = validate_axis(axis, df)
|
|
173
|
+
func_name = getattr(self, "_func_name")
|
|
174
|
+
|
|
175
|
+
if level is not None and axis == 1:
|
|
176
|
+
raise NotImplementedError("Not support specify level for axis==1")
|
|
177
|
+
|
|
178
|
+
if func_name == "size":
|
|
179
|
+
reduced = pd.Series(
|
|
180
|
+
np.zeros(df.shape[1 - axis]),
|
|
181
|
+
index=df.dtypes.index if axis == 0 else None,
|
|
182
|
+
)
|
|
183
|
+
reduced_cols = list(reduced.index)
|
|
184
|
+
reduced_dtype = reduced.dtype
|
|
185
|
+
elif func_name == "custom_reduction":
|
|
186
|
+
empty_df = build_df(df, ensure_string=True)
|
|
187
|
+
reduced = getattr(self, "custom_reduction").__call_agg__(empty_df)
|
|
188
|
+
reduced_cols = list(reduced.index)
|
|
189
|
+
reduced_dtype = reduced.dtype
|
|
190
|
+
else:
|
|
191
|
+
reduced_cols, dtypes = [], []
|
|
192
|
+
for col, src_dt in df.dtypes.items():
|
|
193
|
+
dt = _get_df_reduction_dtype(
|
|
194
|
+
src_dt,
|
|
195
|
+
func_name,
|
|
196
|
+
axis=axis,
|
|
197
|
+
bool_only=bool_only,
|
|
198
|
+
skipna=skipna,
|
|
199
|
+
numeric_only=numeric_only,
|
|
200
|
+
)
|
|
201
|
+
if dt is not None:
|
|
202
|
+
reduced_cols.append(col)
|
|
203
|
+
dtypes.append(dt)
|
|
204
|
+
elif (
|
|
205
|
+
_level_reduction_keep_object
|
|
206
|
+
and numeric_only
|
|
207
|
+
and level is not None
|
|
208
|
+
and func_name in ("min", "max")
|
|
209
|
+
and src_dt == np.dtype(object)
|
|
210
|
+
): # pragma: no cover
|
|
211
|
+
reduced_cols.append(col)
|
|
212
|
+
dtypes.append(np.dtype(object))
|
|
213
|
+
if len(dtypes) == 0:
|
|
214
|
+
reduced_dtype = np.dtype("O")
|
|
215
|
+
elif all(dt == dtypes[0] for dt in dtypes):
|
|
216
|
+
reduced_dtype = dtypes[0]
|
|
217
|
+
else:
|
|
218
|
+
# as we already bypassed dtypes with same values,
|
|
219
|
+
# when has_mixed_bool is True, there are other dtypes
|
|
220
|
+
# other than bool.
|
|
221
|
+
has_mixed_bool = any(dt == np.dtype(bool) for dt in dtypes)
|
|
222
|
+
if _reduce_bool_as_object and has_mixed_bool:
|
|
223
|
+
reduced_dtype = np.dtype("O")
|
|
224
|
+
elif not all(isinstance(dt, np.dtype) for dt in dtypes):
|
|
225
|
+
# todo currently we return mixed dtypes as np.dtype('O').
|
|
226
|
+
# handle pandas Dtypes in the future more carefully.
|
|
227
|
+
reduced_dtype = np.dtype("O")
|
|
228
|
+
else:
|
|
229
|
+
reduced_dtype = np.find_common_type(dtypes, [])
|
|
230
|
+
|
|
231
|
+
if level is not None:
|
|
232
|
+
return self._call_groupby_level(df[reduced_cols], level)
|
|
233
|
+
|
|
234
|
+
if axis == 0:
|
|
235
|
+
reduced_shape = (len(reduced_cols),)
|
|
236
|
+
reduced_index_value = parse_index(pd.Index(reduced_cols), store_data=True)
|
|
237
|
+
else:
|
|
238
|
+
reduced_shape = (df.shape[0],)
|
|
239
|
+
reduced_index_value = parse_index(pd.RangeIndex(-1))
|
|
240
|
+
|
|
241
|
+
return self.new_series(
|
|
242
|
+
[df],
|
|
243
|
+
shape=reduced_shape,
|
|
244
|
+
dtype=reduced_dtype,
|
|
245
|
+
index_value=reduced_index_value,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def _call_series(self, series):
|
|
249
|
+
level = getattr(self, "level", None)
|
|
250
|
+
axis = getattr(self, "axis", None)
|
|
251
|
+
skipna = getattr(self, "skipna", True)
|
|
252
|
+
numeric_only = getattr(self, "numeric_only", None)
|
|
253
|
+
bool_only = getattr(self, "bool_only", None)
|
|
254
|
+
self.axis = axis = validate_axis(axis or 0, series)
|
|
255
|
+
func_name = getattr(self, "_func_name")
|
|
256
|
+
|
|
257
|
+
if level is not None:
|
|
258
|
+
return self._call_groupby_level(series, level)
|
|
259
|
+
|
|
260
|
+
if func_name == "custom_reduction":
|
|
261
|
+
empty_series = build_series(series, ensure_string=True)
|
|
262
|
+
result_scalar = getattr(self, "custom_reduction").__call_agg__(empty_series)
|
|
263
|
+
if hasattr(result_scalar, "to_pandas"): # pragma: no cover
|
|
264
|
+
result_scalar = result_scalar.to_pandas()
|
|
265
|
+
result_dtype = pd.Series(result_scalar).dtype
|
|
266
|
+
else:
|
|
267
|
+
result_dtype = _get_series_reduction_dtype(
|
|
268
|
+
series.dtype,
|
|
269
|
+
func_name,
|
|
270
|
+
axis=axis,
|
|
271
|
+
bool_only=bool_only,
|
|
272
|
+
numeric_only=numeric_only,
|
|
273
|
+
skipna=skipna,
|
|
274
|
+
)
|
|
275
|
+
return self.new_scalar([series], dtype=result_dtype)
|
|
276
|
+
|
|
277
|
+
def __call__(self, a):
|
|
278
|
+
if is_kernel_mode() and not getattr(self, "is_atomic", False):
|
|
279
|
+
return self.get_reduction_callable(self)(a)
|
|
280
|
+
|
|
281
|
+
if isinstance(a, DATAFRAME_TYPE):
|
|
282
|
+
return self._call_dataframe(a)
|
|
283
|
+
else:
|
|
284
|
+
return self._call_series(a)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class DataFrameCumReductionMixin(DataFrameOperatorMixin):
|
|
288
|
+
def _call_dataframe(self, df):
|
|
289
|
+
axis = getattr(self, "axis", None) or 0
|
|
290
|
+
self.axis = axis = validate_axis(axis, df)
|
|
291
|
+
|
|
292
|
+
empty_df = build_empty_df(df.dtypes)
|
|
293
|
+
reduced_df = getattr(empty_df, getattr(self, "_func_name"))(axis=axis)
|
|
294
|
+
return self.new_dataframe(
|
|
295
|
+
[df],
|
|
296
|
+
shape=df.shape,
|
|
297
|
+
dtypes=reduced_df.dtypes,
|
|
298
|
+
index_value=df.index_value,
|
|
299
|
+
columns_value=df.columns_value,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
def _call_series(self, series):
|
|
303
|
+
axis = getattr(self, "axis", None) or 0
|
|
304
|
+
if axis == "index":
|
|
305
|
+
axis = 0
|
|
306
|
+
self.axis = axis
|
|
307
|
+
|
|
308
|
+
return self.new_series(
|
|
309
|
+
[series],
|
|
310
|
+
shape=series.shape,
|
|
311
|
+
dtype=series.dtype,
|
|
312
|
+
name=series.name,
|
|
313
|
+
index_value=series.index_value,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
def __call__(self, a):
|
|
317
|
+
if isinstance(a, DATAFRAME_TYPE):
|
|
318
|
+
return self._call_dataframe(a)
|
|
319
|
+
else:
|
|
320
|
+
return self._call_series(a)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class CustomReduction:
|
|
324
|
+
name: Optional[str]
|
|
325
|
+
output_limit: Optional[int]
|
|
326
|
+
kwds: Dict
|
|
327
|
+
|
|
328
|
+
# set to True when pre() already performs aggregation
|
|
329
|
+
pre_with_agg = False
|
|
330
|
+
|
|
331
|
+
def __init__(self, name=None, is_gpu=None):
|
|
332
|
+
self.name = name or "<custom>"
|
|
333
|
+
self.output_limit = 1
|
|
334
|
+
self._is_gpu = is_gpu
|
|
335
|
+
|
|
336
|
+
@property
|
|
337
|
+
def __name__(self):
|
|
338
|
+
return self.name
|
|
339
|
+
|
|
340
|
+
def __call__(self, value):
|
|
341
|
+
if isinstance(value, ENTITY_TYPE):
|
|
342
|
+
from .custom_reduction import build_custom_reduction_result
|
|
343
|
+
|
|
344
|
+
return build_custom_reduction_result(value, self)
|
|
345
|
+
return self.__call_agg__(value)
|
|
346
|
+
|
|
347
|
+
def __call_agg__(self, value):
|
|
348
|
+
r = self.pre(value)
|
|
349
|
+
if not isinstance(r, tuple):
|
|
350
|
+
r = (r,)
|
|
351
|
+
# update output limit into actual size
|
|
352
|
+
self.output_limit = len(r)
|
|
353
|
+
|
|
354
|
+
# only perform aggregation when pre() does not perform aggregation
|
|
355
|
+
if not self.pre_with_agg:
|
|
356
|
+
r = self.agg(*r)
|
|
357
|
+
if not isinstance(r, tuple):
|
|
358
|
+
r = (r,)
|
|
359
|
+
|
|
360
|
+
r = self.post(*r)
|
|
361
|
+
return r
|
|
362
|
+
|
|
363
|
+
def is_gpu(self):
|
|
364
|
+
return self._is_gpu if not is_build_mode() else False
|
|
365
|
+
|
|
366
|
+
def pre(self, value): # noqa: R0201 # pylint: disable=no-self-use
|
|
367
|
+
return (value,)
|
|
368
|
+
|
|
369
|
+
def agg(self, *values): # noqa: R0201 # pylint: disable=no-self-use
|
|
370
|
+
raise NotImplementedError
|
|
371
|
+
|
|
372
|
+
def post(self, *value): # noqa: R0201 # pylint: disable=no-self-use
|
|
373
|
+
assert len(value) == 1
|
|
374
|
+
return value[0]
|
|
375
|
+
|
|
376
|
+
def __maxframe_tokenize__(self):
|
|
377
|
+
import cloudpickle
|
|
378
|
+
|
|
379
|
+
return cloudpickle.dumps(self)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class ReductionPreStep(NamedTuple):
|
|
383
|
+
input_key: str
|
|
384
|
+
output_key: str
|
|
385
|
+
columns: Optional[List[str]]
|
|
386
|
+
func_idl: bytes
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class ReductionAggStep(NamedTuple):
|
|
390
|
+
input_key: str
|
|
391
|
+
raw_func_name: Optional[str]
|
|
392
|
+
step_func_name: Optional[str]
|
|
393
|
+
map_func_name: Optional[str]
|
|
394
|
+
agg_func_name: Optional[str]
|
|
395
|
+
custom_reduction: Optional[CustomReduction]
|
|
396
|
+
output_key: str
|
|
397
|
+
output_limit: int
|
|
398
|
+
kwds: Dict[str, Any]
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
class ReductionPostStep(NamedTuple):
|
|
402
|
+
input_keys: List[str]
|
|
403
|
+
output_key: str
|
|
404
|
+
func_name: str
|
|
405
|
+
columns: Optional[List[str]]
|
|
406
|
+
func_idl: bytes
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class ReductionSteps(NamedTuple):
|
|
410
|
+
pre_funcs: List[ReductionPreStep]
|
|
411
|
+
agg_funcs: List[ReductionAggStep]
|
|
412
|
+
post_funcs: List[ReductionPostStep]
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# lookup table for numpy arithmetic operators in pandas
|
|
416
|
+
_func_name_converts = dict(
|
|
417
|
+
greater="gt",
|
|
418
|
+
greater_equal="ge",
|
|
419
|
+
less="lt",
|
|
420
|
+
less_equal="le",
|
|
421
|
+
equal="eq",
|
|
422
|
+
not_equal="ne",
|
|
423
|
+
true_divide="truediv",
|
|
424
|
+
floor_divide="floordiv",
|
|
425
|
+
power="pow",
|
|
426
|
+
)
|
|
427
|
+
_func_compile_cache = dict() # type: Dict[str, ReductionSteps]
|
|
428
|
+
|
|
429
|
+
_idl_primitive_types = (
|
|
430
|
+
type(None),
|
|
431
|
+
int,
|
|
432
|
+
float,
|
|
433
|
+
bool,
|
|
434
|
+
str,
|
|
435
|
+
bytes,
|
|
436
|
+
np.integer,
|
|
437
|
+
np.bool_,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
IN_VAR_IDL_OP = "in_var"
|
|
441
|
+
OUT_VAR_IDL_OP = "out_var"
|
|
442
|
+
MASK_VAR_OP = "mask"
|
|
443
|
+
WHERE_VAR_OP = "where"
|
|
444
|
+
LET_VAR_OP = "let"
|
|
445
|
+
UNARY_IDL_OP_PREFIX = "unary:"
|
|
446
|
+
BINARY_IDL_OP_PREFIX = "bin:"
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class ReductionCompiler:
|
|
450
|
+
def __init__(self, axis=0):
|
|
451
|
+
self._axis = axis
|
|
452
|
+
|
|
453
|
+
self._key_to_tileable = dict()
|
|
454
|
+
self._output_tileables = []
|
|
455
|
+
self._lambda_counter = 0
|
|
456
|
+
self._custom_counter = 0
|
|
457
|
+
self._func_cache = dict()
|
|
458
|
+
|
|
459
|
+
self._compiled_funcs = []
|
|
460
|
+
self._output_key_to_pre_steps = dict()
|
|
461
|
+
self._output_key_to_pre_cols = dict()
|
|
462
|
+
self._output_key_to_agg_steps = dict()
|
|
463
|
+
self._output_key_to_post_steps = dict()
|
|
464
|
+
self._output_key_to_post_cols = dict()
|
|
465
|
+
|
|
466
|
+
@classmethod
|
|
467
|
+
def _check_function_valid(cls, func):
|
|
468
|
+
if isinstance(func, functools.partial):
|
|
469
|
+
return cls._check_function_valid(func.func)
|
|
470
|
+
elif isinstance(func, CustomReduction):
|
|
471
|
+
return
|
|
472
|
+
|
|
473
|
+
func_code = func.__code__
|
|
474
|
+
func_vars = {n: func.__globals__.get(n) for n in func_code.co_names}
|
|
475
|
+
if func.__closure__:
|
|
476
|
+
func_vars.update(
|
|
477
|
+
{
|
|
478
|
+
n: cell.cell_contents
|
|
479
|
+
for n, cell in zip(func_code.co_freevars, func.__closure__)
|
|
480
|
+
}
|
|
481
|
+
)
|
|
482
|
+
# external MaxFrame objects shall not be referenced
|
|
483
|
+
for var_name, val in func_vars.items():
|
|
484
|
+
if isinstance(val, ENTITY_TYPE):
|
|
485
|
+
raise ValueError(
|
|
486
|
+
f"Variable {var_name} used by {func.__name__} "
|
|
487
|
+
"cannot be a MaxFrame object"
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
@staticmethod
|
|
491
|
+
def _update_col_dict(col_dict: Dict, key: str, cols: List):
|
|
492
|
+
if key in col_dict:
|
|
493
|
+
existing_cols = col_dict[key]
|
|
494
|
+
if existing_cols is not None:
|
|
495
|
+
existing_col_set = set(existing_cols)
|
|
496
|
+
col_dict[key].extend([c for c in cols if c not in existing_col_set])
|
|
497
|
+
else:
|
|
498
|
+
col_dict[key] = list(cols) if cols is not None else None
|
|
499
|
+
|
|
500
|
+
def add_function(self, func, ndim, cols=None, func_name=None):
|
|
501
|
+
from .aggregation import _agg_functions
|
|
502
|
+
|
|
503
|
+
cols = cols if cols is not None and self._axis == 0 else None
|
|
504
|
+
|
|
505
|
+
func_name = func_name or getattr(func, "__name__", None)
|
|
506
|
+
if func_name == "<lambda>" or func_name is None:
|
|
507
|
+
func_name = f"<lambda_{self._lambda_counter}>"
|
|
508
|
+
self._lambda_counter += 1
|
|
509
|
+
if func_name == "<custom>" or func_name is None:
|
|
510
|
+
func_name = f"<custom_{self._custom_counter}>"
|
|
511
|
+
self._custom_counter += 1
|
|
512
|
+
|
|
513
|
+
if inspect.isbuiltin(func):
|
|
514
|
+
raw_func_name = getattr(func, "__name__", "N/A")
|
|
515
|
+
if raw_func_name in _agg_functions:
|
|
516
|
+
func = _agg_functions[raw_func_name]
|
|
517
|
+
else:
|
|
518
|
+
raise ValueError(f"Unexpected built-in function {raw_func_name}")
|
|
519
|
+
|
|
520
|
+
compile_result = self._compile_function(func, func_name, ndim=ndim)
|
|
521
|
+
self._compiled_funcs.append(compile_result)
|
|
522
|
+
|
|
523
|
+
for step in compile_result.pre_funcs:
|
|
524
|
+
self._output_key_to_pre_steps[step.output_key] = step
|
|
525
|
+
self._update_col_dict(self._output_key_to_pre_cols, step.output_key, cols)
|
|
526
|
+
|
|
527
|
+
for step in compile_result.agg_funcs:
|
|
528
|
+
self._output_key_to_agg_steps[step.output_key] = step
|
|
529
|
+
|
|
530
|
+
for step in compile_result.post_funcs:
|
|
531
|
+
self._output_key_to_post_steps[step.output_key] = step
|
|
532
|
+
self._update_col_dict(self._output_key_to_post_cols, step.output_key, cols)
|
|
533
|
+
|
|
534
|
+
@staticmethod
|
|
535
|
+
def _build_mock_return_object(func, input_dtype, ndim):
|
|
536
|
+
from ..initializer import DataFrame as MaxDataFrame
|
|
537
|
+
from ..initializer import Series as MaxSeries
|
|
538
|
+
|
|
539
|
+
if ndim == 1:
|
|
540
|
+
mock_series = build_empty_series(np.dtype(input_dtype))
|
|
541
|
+
mock_obj = MaxSeries(mock_series)
|
|
542
|
+
else:
|
|
543
|
+
mock_df = build_empty_df(
|
|
544
|
+
pd.Series([np.dtype(input_dtype)] * 2, index=["A", "B"])
|
|
545
|
+
)
|
|
546
|
+
mock_obj = MaxDataFrame(mock_df)
|
|
547
|
+
|
|
548
|
+
# calc target tileable to generate DAG
|
|
549
|
+
with enter_mode(kernel=True, build=False):
|
|
550
|
+
return func(mock_obj)
|
|
551
|
+
|
|
552
|
+
@enter_mode(build=True)
|
|
553
|
+
def _compile_function(self, func, func_name=None, ndim=1) -> ReductionSteps:
|
|
554
|
+
from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
|
|
555
|
+
from ...tensor.base import TensorWhere
|
|
556
|
+
from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
|
|
557
|
+
from ..datasource.dataframe import DataFrameDataSource
|
|
558
|
+
from ..datasource.series import SeriesDataSource
|
|
559
|
+
from ..indexing.where import DataFrameWhere
|
|
560
|
+
|
|
561
|
+
func_token = tokenize(func, self._axis, func_name, ndim)
|
|
562
|
+
if func_token in _func_compile_cache:
|
|
563
|
+
return _func_compile_cache[func_token]
|
|
564
|
+
custom_reduction = func if isinstance(func, CustomReduction) else None
|
|
565
|
+
|
|
566
|
+
self._check_function_valid(func)
|
|
567
|
+
|
|
568
|
+
try:
|
|
569
|
+
func_ret = self._build_mock_return_object(func, float, ndim=ndim)
|
|
570
|
+
except (TypeError, AttributeError):
|
|
571
|
+
# we may encounter lambda x: x.str.cat(...), use an object series to test
|
|
572
|
+
func_ret = self._build_mock_return_object(func, object, ndim=1)
|
|
573
|
+
output_limit = getattr(func, "output_limit", None) or 1
|
|
574
|
+
|
|
575
|
+
if not isinstance(func_ret, ENTITY_TYPE):
|
|
576
|
+
raise ValueError(
|
|
577
|
+
f"Custom function should return a MaxFrame object, not {type(func_ret)}"
|
|
578
|
+
)
|
|
579
|
+
if func_ret.ndim >= ndim:
|
|
580
|
+
raise ValueError("Function not a reduction")
|
|
581
|
+
|
|
582
|
+
agg_graph = func_ret.build_graph()
|
|
583
|
+
agg_tileables = set(t for t in agg_graph if getattr(t.op, "is_atomic", False))
|
|
584
|
+
# check operators before aggregation
|
|
585
|
+
for t in agg_graph.dfs(
|
|
586
|
+
list(agg_tileables), visit_predicate="all", reverse=True
|
|
587
|
+
):
|
|
588
|
+
if t not in agg_tileables and not isinstance(
|
|
589
|
+
t.op,
|
|
590
|
+
(
|
|
591
|
+
DataFrameUnaryOp,
|
|
592
|
+
DataFrameBinOp,
|
|
593
|
+
TensorUnaryOp,
|
|
594
|
+
TensorBinOp,
|
|
595
|
+
TensorWhere,
|
|
596
|
+
DataFrameWhere,
|
|
597
|
+
DataFrameDataSource,
|
|
598
|
+
SeriesDataSource,
|
|
599
|
+
),
|
|
600
|
+
):
|
|
601
|
+
raise ValueError(f"Cannot support operator {type(t.op)} in aggregation")
|
|
602
|
+
# check operators after aggregation
|
|
603
|
+
for t in agg_graph.dfs(list(agg_tileables), visit_predicate="all"):
|
|
604
|
+
if t not in agg_tileables and not isinstance(
|
|
605
|
+
t.op,
|
|
606
|
+
(
|
|
607
|
+
DataFrameUnaryOp,
|
|
608
|
+
DataFrameBinOp,
|
|
609
|
+
TensorWhere,
|
|
610
|
+
DataFrameWhere,
|
|
611
|
+
TensorUnaryOp,
|
|
612
|
+
TensorBinOp,
|
|
613
|
+
),
|
|
614
|
+
):
|
|
615
|
+
raise ValueError(f"Cannot support operator {type(t.op)} in aggregation")
|
|
616
|
+
|
|
617
|
+
pre_funcs, agg_funcs, post_funcs = [], [], []
|
|
618
|
+
visited_inputs = set()
|
|
619
|
+
# collect aggregations and their inputs
|
|
620
|
+
for t in agg_tileables:
|
|
621
|
+
agg_input_key = t.inputs[0].key
|
|
622
|
+
|
|
623
|
+
# collect agg names
|
|
624
|
+
step_func_name = getattr(t.op, "_func_name")
|
|
625
|
+
if step_func_name in ("count", "size"):
|
|
626
|
+
map_func_name, agg_func_name = step_func_name, "sum"
|
|
627
|
+
else:
|
|
628
|
+
map_func_name, agg_func_name = step_func_name, step_func_name
|
|
629
|
+
|
|
630
|
+
# build agg description
|
|
631
|
+
agg_funcs.append(
|
|
632
|
+
ReductionAggStep(
|
|
633
|
+
agg_input_key,
|
|
634
|
+
func_name,
|
|
635
|
+
step_func_name,
|
|
636
|
+
map_func_name,
|
|
637
|
+
agg_func_name,
|
|
638
|
+
custom_reduction,
|
|
639
|
+
t.key,
|
|
640
|
+
output_limit,
|
|
641
|
+
t.op.get_reduction_args(axis=self._axis),
|
|
642
|
+
)
|
|
643
|
+
)
|
|
644
|
+
# collect agg input and build function
|
|
645
|
+
if agg_input_key not in visited_inputs:
|
|
646
|
+
visited_inputs.add(agg_input_key)
|
|
647
|
+
initial_inputs = list(t.inputs[0].build_graph().iter_indep())
|
|
648
|
+
assert len(initial_inputs) == 1
|
|
649
|
+
input_key = initial_inputs[0].key
|
|
650
|
+
|
|
651
|
+
func_idl, _ = self._generate_function_idl(t.inputs[0])
|
|
652
|
+
pre_funcs.append(
|
|
653
|
+
ReductionPreStep(
|
|
654
|
+
input_key, agg_input_key, None, msgpack.dumps(func_idl)
|
|
655
|
+
)
|
|
656
|
+
)
|
|
657
|
+
# collect function output after agg
|
|
658
|
+
func_idl, input_keys = self._generate_function_idl(func_ret)
|
|
659
|
+
post_funcs.append(
|
|
660
|
+
ReductionPostStep(
|
|
661
|
+
input_keys, func_ret.key, func_name, None, msgpack.dumps(func_idl)
|
|
662
|
+
)
|
|
663
|
+
)
|
|
664
|
+
if len(_func_compile_cache) > 100: # pragma: no cover
|
|
665
|
+
_func_compile_cache.pop(next(iter(_func_compile_cache.keys())))
|
|
666
|
+
result = _func_compile_cache[func_token] = ReductionSteps(
|
|
667
|
+
pre_funcs, agg_funcs, post_funcs
|
|
668
|
+
)
|
|
669
|
+
return result
|
|
670
|
+
|
|
671
|
+
def _generate_function_idl(self, out_tileable: TileableType) -> Tuple[List, List]:
|
|
672
|
+
"""
|
|
673
|
+
Generate function IDL from tileable DAG
|
|
674
|
+
|
|
675
|
+
IDL Format: [
|
|
676
|
+
["in_var", "input_var_name"],
|
|
677
|
+
["op", "op_output_var", ["op_arg1", "op_arg2"], {"op_key1": "op_key2"}],
|
|
678
|
+
["out_var", "output_var_name"],
|
|
679
|
+
]
|
|
680
|
+
"""
|
|
681
|
+
from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
|
|
682
|
+
from ...tensor.base import TensorWhere
|
|
683
|
+
from ...tensor.datasource import Scalar
|
|
684
|
+
from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
|
|
685
|
+
from ..datasource.dataframe import DataFrameDataSource
|
|
686
|
+
from ..datasource.series import SeriesDataSource
|
|
687
|
+
from ..indexing.where import DataFrameWhere
|
|
688
|
+
|
|
689
|
+
input_key_to_var = OrderedDict()
|
|
690
|
+
local_key_to_var = dict()
|
|
691
|
+
idl_lines = []
|
|
692
|
+
|
|
693
|
+
input_op_types = (
|
|
694
|
+
DataFrameDataSource,
|
|
695
|
+
SeriesDataSource,
|
|
696
|
+
DataFrameReductionOperator,
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
def _gen_expr_str(t):
|
|
700
|
+
# generate code for t
|
|
701
|
+
if t.key in local_key_to_var:
|
|
702
|
+
return
|
|
703
|
+
|
|
704
|
+
if isinstance(t.op, input_op_types):
|
|
705
|
+
# tileable is an input arg, build a function variable
|
|
706
|
+
if t.key not in input_key_to_var: # pragma: no branch
|
|
707
|
+
input_key_to_var[t.key] = local_key_to_var[
|
|
708
|
+
t.key
|
|
709
|
+
] = f"invar{len(input_key_to_var)}"
|
|
710
|
+
else:
|
|
711
|
+
for inp in t.inputs:
|
|
712
|
+
_gen_expr_str(inp)
|
|
713
|
+
|
|
714
|
+
var_name = local_key_to_var[t.key] = f"var{len(local_key_to_var)}"
|
|
715
|
+
keys_to_vars = {inp.key: local_key_to_var[inp.key] for inp in t.inputs}
|
|
716
|
+
|
|
717
|
+
def _interpret_var(v):
|
|
718
|
+
v = get_item_if_scalar(v)
|
|
719
|
+
# get representation for variables
|
|
720
|
+
if hasattr(v, "key"):
|
|
721
|
+
return keys_to_vars[v.key]
|
|
722
|
+
elif isinstance(v, _idl_primitive_types):
|
|
723
|
+
return v
|
|
724
|
+
else:
|
|
725
|
+
raise NotImplementedError(
|
|
726
|
+
f"Type {type(v)} currently not interpretable"
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
func_name = getattr(t.op, "_func_name", None)
|
|
730
|
+
if func_name is None:
|
|
731
|
+
func_name = getattr(t.op, "_bit_func_name", None)
|
|
732
|
+
# handle function name differences between numpy and pandas arithmetic ops
|
|
733
|
+
if func_name in _func_name_converts:
|
|
734
|
+
func_name = _func_name_converts[func_name]
|
|
735
|
+
|
|
736
|
+
# build given different op types
|
|
737
|
+
if isinstance(t.op, (DataFrameUnaryOp, TensorUnaryOp)):
|
|
738
|
+
val = _interpret_var(t.inputs[0])
|
|
739
|
+
statements = [
|
|
740
|
+
[UNARY_IDL_OP_PREFIX + func_name, var_name, [val], {}]
|
|
741
|
+
]
|
|
742
|
+
elif isinstance(t.op, (DataFrameBinOp, TensorBinOp)):
|
|
743
|
+
lhs, rhs = t.op.lhs, t.op.rhs
|
|
744
|
+
op_axis = (
|
|
745
|
+
1 - self._axis
|
|
746
|
+
if hasattr(lhs, "ndim")
|
|
747
|
+
and hasattr(rhs, "ndim")
|
|
748
|
+
and lhs.ndim != rhs.ndim
|
|
749
|
+
else None
|
|
750
|
+
)
|
|
751
|
+
lhs = _interpret_var(lhs)
|
|
752
|
+
rhs = _interpret_var(rhs)
|
|
753
|
+
axis_arg = {"axis": op_axis} if op_axis is not None else {}
|
|
754
|
+
statements = [
|
|
755
|
+
[
|
|
756
|
+
BINARY_IDL_OP_PREFIX + func_name,
|
|
757
|
+
var_name,
|
|
758
|
+
[lhs, rhs],
|
|
759
|
+
{},
|
|
760
|
+
axis_arg,
|
|
761
|
+
]
|
|
762
|
+
]
|
|
763
|
+
elif isinstance(t.op, TensorWhere):
|
|
764
|
+
cond = _interpret_var(t.op.condition)
|
|
765
|
+
x = _interpret_var(t.op.x)
|
|
766
|
+
y = _interpret_var(t.op.y)
|
|
767
|
+
statements = [[WHERE_VAR_OP, var_name, [cond, x, y], {}]]
|
|
768
|
+
elif isinstance(t.op, DataFrameWhere):
|
|
769
|
+
func_name = MASK_VAR_OP if t.op.replace_true else WHERE_VAR_OP
|
|
770
|
+
inp = _interpret_var(t.op.input)
|
|
771
|
+
cond = _interpret_var(t.op.cond)
|
|
772
|
+
other = _interpret_var(t.op.other)
|
|
773
|
+
statements = [
|
|
774
|
+
[
|
|
775
|
+
func_name,
|
|
776
|
+
var_name,
|
|
777
|
+
[cond, inp, other],
|
|
778
|
+
{"axis": t.op.axis, "level": t.op.level},
|
|
779
|
+
]
|
|
780
|
+
]
|
|
781
|
+
elif isinstance(t.op, Scalar):
|
|
782
|
+
# for scalar inputs of other operators
|
|
783
|
+
data = _interpret_var(t.op.data)
|
|
784
|
+
statements = [[LET_VAR_OP, var_name, [data]]]
|
|
785
|
+
else: # pragma: no cover
|
|
786
|
+
raise NotImplementedError(
|
|
787
|
+
f"Does not support aggregating on {type(t.op)}"
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
idl_lines.extend(statements)
|
|
791
|
+
|
|
792
|
+
_gen_expr_str(out_tileable)
|
|
793
|
+
|
|
794
|
+
input_idls = [
|
|
795
|
+
[IN_VAR_IDL_OP, var_name] for var_name in input_key_to_var.values()
|
|
796
|
+
]
|
|
797
|
+
output_idls = [[OUT_VAR_IDL_OP, local_key_to_var[out_tileable.key]]]
|
|
798
|
+
return input_idls + idl_lines + output_idls, list(input_key_to_var.keys())
|
|
799
|
+
|
|
800
|
+
def compile(self) -> ReductionSteps:
|
|
801
|
+
pre_funcs, agg_funcs, post_funcs = [], [], []
|
|
802
|
+
referred_cols = set()
|
|
803
|
+
for key, step in self._output_key_to_pre_steps.items():
|
|
804
|
+
cols = self._output_key_to_pre_cols[key]
|
|
805
|
+
if cols:
|
|
806
|
+
referred_cols.update(cols)
|
|
807
|
+
pre_funcs.append(
|
|
808
|
+
ReductionPreStep(step.input_key, step.output_key, cols, step.func_idl)
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
for step in self._output_key_to_agg_steps.values():
|
|
812
|
+
agg_funcs.append(step)
|
|
813
|
+
|
|
814
|
+
for key, step in self._output_key_to_post_steps.items():
|
|
815
|
+
cols = self._output_key_to_post_cols[key]
|
|
816
|
+
if cols and set(cols) == set(referred_cols):
|
|
817
|
+
post_cols = None
|
|
818
|
+
else:
|
|
819
|
+
post_cols = cols
|
|
820
|
+
|
|
821
|
+
func_name = step.func_name
|
|
822
|
+
if self._lambda_counter == 1 and step.func_name == "<lambda_0>":
|
|
823
|
+
func_name = "<lambda>"
|
|
824
|
+
if self._custom_counter == 1 and step.func_name == "<custom_0>":
|
|
825
|
+
func_name = "<custom>"
|
|
826
|
+
|
|
827
|
+
post_funcs.append(
|
|
828
|
+
ReductionPostStep(
|
|
829
|
+
step.input_keys,
|
|
830
|
+
step.output_key,
|
|
831
|
+
func_name,
|
|
832
|
+
post_cols,
|
|
833
|
+
step.func_idl,
|
|
834
|
+
)
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
return ReductionSteps(pre_funcs, agg_funcs, post_funcs)
|