maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Callable, Dict
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from ... import opcodes
|
|
23
|
+
from ...core import ENTITY_TYPE, OutputType
|
|
24
|
+
from ...serialization.serializables import (
|
|
25
|
+
AnyField,
|
|
26
|
+
DictField,
|
|
27
|
+
Int32Field,
|
|
28
|
+
Int64Field,
|
|
29
|
+
ListField,
|
|
30
|
+
StringField,
|
|
31
|
+
)
|
|
32
|
+
from ...utils import lazy_import, pd_release_version
|
|
33
|
+
from ..core import GROUPBY_TYPE
|
|
34
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
35
|
+
from ..reduction.aggregation import (
|
|
36
|
+
compile_reduction_funcs,
|
|
37
|
+
is_funcs_aggregate,
|
|
38
|
+
normalize_reduction_funcs,
|
|
39
|
+
)
|
|
40
|
+
from ..utils import is_cudf, parse_index
|
|
41
|
+
|
|
42
|
+
cp = lazy_import("cupy", rename="cp")
|
|
43
|
+
cudf = lazy_import("cudf")
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
CV_THRESHOLD = 0.2
|
|
47
|
+
MEAN_RATIO_THRESHOLD = 2 / 3
|
|
48
|
+
_support_get_group_without_as_index = pd_release_version[:2] > (1, 0)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SizeRecorder:
|
|
52
|
+
def __init__(self):
|
|
53
|
+
self._raw_records = []
|
|
54
|
+
self._agg_records = []
|
|
55
|
+
|
|
56
|
+
def record(self, raw_record: int, agg_record: int):
|
|
57
|
+
self._raw_records.append(raw_record)
|
|
58
|
+
self._agg_records.append(agg_record)
|
|
59
|
+
|
|
60
|
+
def get(self):
|
|
61
|
+
return self._raw_records, self._agg_records
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
_agg_functions = {
|
|
65
|
+
"sum": lambda x: x.sum(),
|
|
66
|
+
"prod": lambda x: x.prod(),
|
|
67
|
+
"product": lambda x: x.product(),
|
|
68
|
+
"min": lambda x: x.min(),
|
|
69
|
+
"max": lambda x: x.max(),
|
|
70
|
+
"all": lambda x: x.all(),
|
|
71
|
+
"any": lambda x: x.any(),
|
|
72
|
+
"count": lambda x: x.count(),
|
|
73
|
+
"size": lambda x: x._reduction_size(),
|
|
74
|
+
"mean": lambda x: x.mean(),
|
|
75
|
+
"var": lambda x, ddof=1: x.var(ddof=ddof),
|
|
76
|
+
"std": lambda x, ddof=1: x.std(ddof=ddof),
|
|
77
|
+
"sem": lambda x, ddof=1: x.sem(ddof=ddof),
|
|
78
|
+
"skew": lambda x, bias=False: x.skew(bias=bias),
|
|
79
|
+
"kurt": lambda x, bias=False: x.kurt(bias=bias),
|
|
80
|
+
"kurtosis": lambda x, bias=False: x.kurtosis(bias=bias),
|
|
81
|
+
"nunique": lambda x: x.nunique(),
|
|
82
|
+
}
|
|
83
|
+
_series_col_name = "col_name"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _patch_groupby_kurt():
|
|
87
|
+
try:
|
|
88
|
+
from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy
|
|
89
|
+
|
|
90
|
+
if not hasattr(DataFrameGroupBy, "kurt"): # pragma: no branch
|
|
91
|
+
|
|
92
|
+
def _kurt_by_frame(a, *args, **kwargs):
|
|
93
|
+
data = a.to_frame().kurt(*args, **kwargs).iloc[0]
|
|
94
|
+
if is_cudf(data): # pragma: no cover
|
|
95
|
+
data = data.copy()
|
|
96
|
+
return data
|
|
97
|
+
|
|
98
|
+
def _group_kurt(x, *args, **kwargs):
|
|
99
|
+
if kwargs.get("numeric_only") is not None:
|
|
100
|
+
return x.agg(functools.partial(_kurt_by_frame, *args, **kwargs))
|
|
101
|
+
else:
|
|
102
|
+
return x.agg(functools.partial(pd.Series.kurt, *args, **kwargs))
|
|
103
|
+
|
|
104
|
+
DataFrameGroupBy.kurt = DataFrameGroupBy.kurtosis = _group_kurt
|
|
105
|
+
SeriesGroupBy.kurt = SeriesGroupBy.kurtosis = _group_kurt
|
|
106
|
+
except (AttributeError, ImportError): # pragma: no cover
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
_patch_groupby_kurt()
|
|
111
|
+
del _patch_groupby_kurt
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def build_mock_agg_result(
|
|
115
|
+
groupby: GROUPBY_TYPE,
|
|
116
|
+
groupby_params: Dict,
|
|
117
|
+
raw_func: Callable,
|
|
118
|
+
**raw_func_kw,
|
|
119
|
+
):
|
|
120
|
+
try:
|
|
121
|
+
agg_result = groupby.op.build_mock_groupby().aggregate(raw_func, **raw_func_kw)
|
|
122
|
+
except ValueError:
|
|
123
|
+
if (
|
|
124
|
+
groupby_params.get("as_index") or _support_get_group_without_as_index
|
|
125
|
+
): # pragma: no cover
|
|
126
|
+
raise
|
|
127
|
+
agg_result = (
|
|
128
|
+
groupby.op.build_mock_groupby(as_index=True)
|
|
129
|
+
.aggregate(raw_func, **raw_func_kw)
|
|
130
|
+
.to_frame()
|
|
131
|
+
)
|
|
132
|
+
agg_result.index.names = [None] * agg_result.index.nlevels
|
|
133
|
+
return agg_result
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
137
|
+
_op_type_ = opcodes.GROUPBY_AGG
|
|
138
|
+
|
|
139
|
+
raw_func = AnyField("raw_func")
|
|
140
|
+
raw_func_kw = DictField("raw_func_kw")
|
|
141
|
+
func = AnyField("func")
|
|
142
|
+
func_rename = ListField("func_rename", default=None)
|
|
143
|
+
|
|
144
|
+
raw_groupby_params = DictField("raw_groupby_params")
|
|
145
|
+
groupby_params = DictField("groupby_params")
|
|
146
|
+
|
|
147
|
+
method = StringField("method")
|
|
148
|
+
|
|
149
|
+
# for chunk
|
|
150
|
+
chunk_store_limit = Int64Field("chunk_store_limit")
|
|
151
|
+
pre_funcs = ListField("pre_funcs")
|
|
152
|
+
agg_funcs = ListField("agg_funcs")
|
|
153
|
+
post_funcs = ListField("post_funcs")
|
|
154
|
+
index_levels = Int32Field("index_levels")
|
|
155
|
+
size_recorder_name = StringField("size_recorder_name")
|
|
156
|
+
|
|
157
|
+
def _set_inputs(self, inputs):
|
|
158
|
+
super()._set_inputs(inputs)
|
|
159
|
+
inputs_iter = iter(self._inputs[1:])
|
|
160
|
+
if len(self._inputs) > 1:
|
|
161
|
+
by = []
|
|
162
|
+
for v in self.groupby_params["by"]:
|
|
163
|
+
if isinstance(v, ENTITY_TYPE):
|
|
164
|
+
by.append(next(inputs_iter))
|
|
165
|
+
else:
|
|
166
|
+
by.append(v)
|
|
167
|
+
self.groupby_params["by"] = by
|
|
168
|
+
|
|
169
|
+
def _get_inputs(self, inputs):
|
|
170
|
+
if isinstance(self.groupby_params["by"], list):
|
|
171
|
+
for v in self.groupby_params["by"]:
|
|
172
|
+
if isinstance(v, ENTITY_TYPE):
|
|
173
|
+
inputs.append(v)
|
|
174
|
+
return inputs
|
|
175
|
+
|
|
176
|
+
def _get_index_levels(self, groupby, mock_index):
|
|
177
|
+
if not self.groupby_params["as_index"]:
|
|
178
|
+
try:
|
|
179
|
+
as_index_agg_df = groupby.op.build_mock_groupby(
|
|
180
|
+
as_index=True
|
|
181
|
+
).aggregate(self.raw_func, **self.raw_func_kw)
|
|
182
|
+
except: # noqa: E722 # nosec # pylint: disable=bare-except
|
|
183
|
+
# handling cases like mdf.groupby("b", as_index=False).b.agg({"c": "count"})
|
|
184
|
+
if isinstance(self.groupby_params["by"], list):
|
|
185
|
+
return len(self.groupby_params["by"])
|
|
186
|
+
raise # pragma: no cover
|
|
187
|
+
pd_index = as_index_agg_df.index
|
|
188
|
+
else:
|
|
189
|
+
pd_index = mock_index
|
|
190
|
+
return 1 if not isinstance(pd_index, pd.MultiIndex) else len(pd_index.levels)
|
|
191
|
+
|
|
192
|
+
def _fix_as_index(self, result_index: pd.Index):
|
|
193
|
+
# make sure if as_index=False takes effect
|
|
194
|
+
if isinstance(result_index, pd.MultiIndex):
|
|
195
|
+
# if MultiIndex, as_index=False definitely takes no effect
|
|
196
|
+
self.groupby_params["as_index"] = True
|
|
197
|
+
elif result_index.name is not None:
|
|
198
|
+
# if not MultiIndex and agg_df.index has a name
|
|
199
|
+
# means as_index=False takes no effect
|
|
200
|
+
self.groupby_params["as_index"] = True
|
|
201
|
+
|
|
202
|
+
def _call_dataframe(self, groupby, input_df):
|
|
203
|
+
compile_reduction_funcs(self, input_df)
|
|
204
|
+
agg_df = build_mock_agg_result(
|
|
205
|
+
groupby, self.groupby_params, self.raw_func, **self.raw_func_kw
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
shape = (np.nan, agg_df.shape[1])
|
|
209
|
+
if isinstance(agg_df.index, pd.RangeIndex):
|
|
210
|
+
index_value = parse_index(
|
|
211
|
+
pd.RangeIndex(-1), groupby.key, groupby.index_value.key
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
index_value = parse_index(
|
|
215
|
+
agg_df.index, groupby.key, groupby.index_value.key
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# make sure if as_index=False takes effect
|
|
219
|
+
self._fix_as_index(agg_df.index)
|
|
220
|
+
|
|
221
|
+
# determine num of indices to group in intermediate steps
|
|
222
|
+
self.index_levels = self._get_index_levels(groupby, agg_df.index)
|
|
223
|
+
|
|
224
|
+
inputs = self._get_inputs([input_df])
|
|
225
|
+
return self.new_dataframe(
|
|
226
|
+
inputs,
|
|
227
|
+
shape=shape,
|
|
228
|
+
dtypes=agg_df.dtypes,
|
|
229
|
+
index_value=index_value,
|
|
230
|
+
columns_value=parse_index(agg_df.columns, store_data=True),
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def _call_series(self, groupby, in_series):
|
|
234
|
+
compile_reduction_funcs(self, in_series)
|
|
235
|
+
agg_result = build_mock_agg_result(
|
|
236
|
+
groupby, self.groupby_params, self.raw_func, **self.raw_func_kw
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# make sure if as_index=False takes effect
|
|
240
|
+
self._fix_as_index(agg_result.index)
|
|
241
|
+
|
|
242
|
+
index_value = parse_index(
|
|
243
|
+
agg_result.index, groupby.key, groupby.index_value.key
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
inputs = self._get_inputs([in_series])
|
|
247
|
+
|
|
248
|
+
# determine num of indices to group in intermediate steps
|
|
249
|
+
self.index_levels = self._get_index_levels(groupby, agg_result.index)
|
|
250
|
+
|
|
251
|
+
# update value type
|
|
252
|
+
if isinstance(agg_result, pd.DataFrame):
|
|
253
|
+
return self.new_dataframe(
|
|
254
|
+
inputs,
|
|
255
|
+
shape=(np.nan, len(agg_result.columns)),
|
|
256
|
+
dtypes=agg_result.dtypes,
|
|
257
|
+
index_value=index_value,
|
|
258
|
+
columns_value=parse_index(agg_result.columns, store_data=True),
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
return self.new_series(
|
|
262
|
+
inputs,
|
|
263
|
+
shape=(np.nan,),
|
|
264
|
+
dtype=agg_result.dtype,
|
|
265
|
+
name=agg_result.name,
|
|
266
|
+
index_value=index_value,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def __call__(self, groupby):
|
|
270
|
+
normalize_reduction_funcs(self, ndim=groupby.ndim)
|
|
271
|
+
df = groupby
|
|
272
|
+
while df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
273
|
+
df = df.inputs[0]
|
|
274
|
+
|
|
275
|
+
if self.raw_func == "size":
|
|
276
|
+
self.output_types = [OutputType.series]
|
|
277
|
+
else:
|
|
278
|
+
self.output_types = (
|
|
279
|
+
[OutputType.dataframe]
|
|
280
|
+
if groupby.op.output_types[0] == OutputType.dataframe_groupby
|
|
281
|
+
else [OutputType.series]
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if self.output_types[0] == OutputType.dataframe:
|
|
285
|
+
return self._call_dataframe(groupby, df)
|
|
286
|
+
else:
|
|
287
|
+
return self._call_series(groupby, df)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def agg(groupby, func=None, method="auto", *args, **kwargs):
|
|
291
|
+
"""
|
|
292
|
+
Aggregate using one or more operations on grouped data.
|
|
293
|
+
|
|
294
|
+
Parameters
|
|
295
|
+
----------
|
|
296
|
+
groupby : MaxFrame Groupby
|
|
297
|
+
Groupby data.
|
|
298
|
+
func : str or list-like
|
|
299
|
+
Aggregation functions.
|
|
300
|
+
method : {'auto', 'shuffle', 'tree'}, default 'auto'
|
|
301
|
+
'tree' method provide a better performance, 'shuffle' is recommended
|
|
302
|
+
if aggregated result is very large, 'auto' will use 'shuffle' method
|
|
303
|
+
in distributed mode and use 'tree' in local mode.
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
Series or DataFrame
|
|
309
|
+
Aggregated result.
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
# When perform a computation on the grouped data, we won't shuffle
|
|
313
|
+
# the data in the stage of groupby and do shuffle after aggregation.
|
|
314
|
+
|
|
315
|
+
if not isinstance(groupby, GROUPBY_TYPE):
|
|
316
|
+
raise TypeError(f"Input should be type of groupby, not {type(groupby)}")
|
|
317
|
+
|
|
318
|
+
if method is None:
|
|
319
|
+
method = "auto"
|
|
320
|
+
if method not in ["shuffle", "tree", "auto"]:
|
|
321
|
+
raise ValueError(
|
|
322
|
+
f"Method {method} is not available, please specify 'tree' or 'shuffle"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if not is_funcs_aggregate(func, ndim=groupby.ndim):
|
|
326
|
+
# pass index to transform, otherwise it will lose name info for index
|
|
327
|
+
agg_result = build_mock_agg_result(
|
|
328
|
+
groupby, groupby.op.groupby_params, func, **kwargs
|
|
329
|
+
)
|
|
330
|
+
if isinstance(agg_result.index, pd.RangeIndex):
|
|
331
|
+
# set -1 to represent unknown size for RangeIndex
|
|
332
|
+
index_value = parse_index(
|
|
333
|
+
pd.RangeIndex(-1), groupby.key, groupby.index_value.key
|
|
334
|
+
)
|
|
335
|
+
else:
|
|
336
|
+
index_value = parse_index(
|
|
337
|
+
agg_result.index, groupby.key, groupby.index_value.key
|
|
338
|
+
)
|
|
339
|
+
return groupby.transform(
|
|
340
|
+
func, *args, _call_agg=True, index=index_value, **kwargs
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
agg_op = DataFrameGroupByAgg(
|
|
344
|
+
raw_func=func,
|
|
345
|
+
raw_func_kw=kwargs,
|
|
346
|
+
method=method,
|
|
347
|
+
raw_groupby_params=groupby.op.groupby_params,
|
|
348
|
+
groupby_params=groupby.op.groupby_params,
|
|
349
|
+
)
|
|
350
|
+
return agg_op(groupby)
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ... import opcodes
|
|
19
|
+
from ...core import OutputType
|
|
20
|
+
from ...core.operator import OperatorLogicKeyGeneratorMixin
|
|
21
|
+
from ...serialization.serializables import (
|
|
22
|
+
AnyField,
|
|
23
|
+
BoolField,
|
|
24
|
+
DictField,
|
|
25
|
+
FunctionField,
|
|
26
|
+
StringField,
|
|
27
|
+
TupleField,
|
|
28
|
+
)
|
|
29
|
+
from ...utils import get_func_token, quiet_stdio, tokenize
|
|
30
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
31
|
+
from ..utils import make_dtype, make_dtypes, parse_index, validate_output_types
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class GroupByApplyLogicKeyGeneratorMixin(OperatorLogicKeyGeneratorMixin):
|
|
35
|
+
def _get_logic_key_token_values(self):
|
|
36
|
+
token_values = super()._get_logic_key_token_values()
|
|
37
|
+
if self.func:
|
|
38
|
+
return token_values + [get_func_token(self.func)]
|
|
39
|
+
else: # pragma: no cover
|
|
40
|
+
return token_values
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class GroupByApply(
|
|
44
|
+
DataFrameOperator, DataFrameOperatorMixin, GroupByApplyLogicKeyGeneratorMixin
|
|
45
|
+
):
|
|
46
|
+
_op_type_ = opcodes.APPLY
|
|
47
|
+
_op_module_ = "dataframe.groupby"
|
|
48
|
+
|
|
49
|
+
func = FunctionField("func")
|
|
50
|
+
args = TupleField("args", default_factory=tuple)
|
|
51
|
+
kwds = DictField("kwds", default_factory=dict)
|
|
52
|
+
maybe_agg = BoolField("maybe_agg", default=None)
|
|
53
|
+
logic_key = StringField("logic_key", default=None)
|
|
54
|
+
func_key = AnyField("func_key", default=None)
|
|
55
|
+
need_clean_up_func = BoolField("need_clean_up_func", default=False)
|
|
56
|
+
|
|
57
|
+
def __init__(self, output_types=None, **kw):
|
|
58
|
+
super().__init__(_output_types=output_types, **kw)
|
|
59
|
+
|
|
60
|
+
def _update_key(self):
|
|
61
|
+
values = [v for v in self._values_ if v is not self.func] + [
|
|
62
|
+
get_func_token(self.func)
|
|
63
|
+
]
|
|
64
|
+
self._obj_set("_key", tokenize(type(self).__name__, *values))
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
def _infer_df_func_returns(
|
|
68
|
+
self, in_groupby, in_df, dtypes=None, dtype=None, name=None, index=None
|
|
69
|
+
):
|
|
70
|
+
index_value, output_type, new_dtypes = None, None, None
|
|
71
|
+
|
|
72
|
+
if self.output_types is not None and (dtypes is not None or dtype is not None):
|
|
73
|
+
ret_dtypes = dtypes if dtypes is not None else (dtype, name)
|
|
74
|
+
ret_index_value = parse_index(index) if index is not None else None
|
|
75
|
+
return ret_dtypes, ret_index_value
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
infer_df = in_groupby.op.build_mock_groupby().apply(
|
|
79
|
+
self.func, *self.args, **self.kwds
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if len(infer_df) <= 2:
|
|
83
|
+
# we create mock df with 4 rows, 2 groups
|
|
84
|
+
# if return df has 2 rows, we assume that
|
|
85
|
+
# it's an aggregation operation
|
|
86
|
+
self.maybe_agg = True
|
|
87
|
+
|
|
88
|
+
# todo return proper index when sort=True is implemented
|
|
89
|
+
index_value = parse_index(infer_df.index[:0], in_df.key, self.func)
|
|
90
|
+
|
|
91
|
+
# for backward compatibility
|
|
92
|
+
dtype = dtype if dtype is not None else dtypes
|
|
93
|
+
if isinstance(infer_df, pd.DataFrame):
|
|
94
|
+
output_type = output_type or OutputType.dataframe
|
|
95
|
+
new_dtypes = new_dtypes or infer_df.dtypes
|
|
96
|
+
elif isinstance(infer_df, pd.Series):
|
|
97
|
+
output_type = output_type or OutputType.series
|
|
98
|
+
new_dtypes = new_dtypes or (
|
|
99
|
+
name or infer_df.name,
|
|
100
|
+
dtype or infer_df.dtype,
|
|
101
|
+
)
|
|
102
|
+
else:
|
|
103
|
+
output_type = OutputType.series
|
|
104
|
+
new_dtypes = (name, dtype or pd.Series(infer_df).dtype)
|
|
105
|
+
except: # noqa: E722 # nosec
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
self.output_types = (
|
|
109
|
+
[output_type]
|
|
110
|
+
if not self.output_types and output_type
|
|
111
|
+
else self.output_types
|
|
112
|
+
)
|
|
113
|
+
dtypes = new_dtypes if dtypes is None else dtypes
|
|
114
|
+
index_value = index_value if index is None else parse_index(index)
|
|
115
|
+
return dtypes, index_value
|
|
116
|
+
|
|
117
|
+
def __call__(self, groupby, dtypes=None, dtype=None, name=None, index=None):
|
|
118
|
+
in_df = groupby
|
|
119
|
+
if self.output_types and self.output_types[0] == OutputType.df_or_series:
|
|
120
|
+
return self.new_df_or_series([groupby])
|
|
121
|
+
while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
122
|
+
in_df = in_df.inputs[0]
|
|
123
|
+
|
|
124
|
+
with quiet_stdio():
|
|
125
|
+
dtypes, index_value = self._infer_df_func_returns(
|
|
126
|
+
groupby, in_df, dtypes, dtype=dtype, name=name, index=index
|
|
127
|
+
)
|
|
128
|
+
if index_value is None:
|
|
129
|
+
index_value = parse_index(None, (in_df.key, in_df.index_value.key))
|
|
130
|
+
for arg, desc in zip((self.output_types, dtypes), ("output_types", "dtypes")):
|
|
131
|
+
if arg is None:
|
|
132
|
+
raise TypeError(
|
|
133
|
+
f"Cannot determine {desc} by calculating with enumerate data, "
|
|
134
|
+
"please specify it as arguments"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if self.output_types[0] == OutputType.dataframe:
|
|
138
|
+
new_shape = (np.nan, len(dtypes))
|
|
139
|
+
return self.new_dataframe(
|
|
140
|
+
[groupby],
|
|
141
|
+
shape=new_shape,
|
|
142
|
+
dtypes=dtypes,
|
|
143
|
+
index_value=index_value,
|
|
144
|
+
columns_value=parse_index(dtypes.index, store_data=True),
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
name = name or dtypes[0]
|
|
148
|
+
dtype = dtype or dtypes[1]
|
|
149
|
+
new_shape = (np.nan,)
|
|
150
|
+
return self.new_series(
|
|
151
|
+
[groupby],
|
|
152
|
+
name=name,
|
|
153
|
+
shape=new_shape,
|
|
154
|
+
dtype=dtype,
|
|
155
|
+
index_value=index_value,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def groupby_apply(
|
|
160
|
+
groupby,
|
|
161
|
+
func,
|
|
162
|
+
*args,
|
|
163
|
+
output_type=None,
|
|
164
|
+
dtypes=None,
|
|
165
|
+
dtype=None,
|
|
166
|
+
name=None,
|
|
167
|
+
index=None,
|
|
168
|
+
skip_infer=None,
|
|
169
|
+
**kwargs,
|
|
170
|
+
):
|
|
171
|
+
"""
|
|
172
|
+
Apply function `func` group-wise and combine the results together.
|
|
173
|
+
|
|
174
|
+
The function passed to `apply` must take a dataframe as its first
|
|
175
|
+
argument and return a DataFrame, Series or scalar. `apply` will
|
|
176
|
+
then take care of combining the results back together into a single
|
|
177
|
+
dataframe or series. `apply` is therefore a highly flexible
|
|
178
|
+
grouping method.
|
|
179
|
+
|
|
180
|
+
While `apply` is a very flexible method, its downside is that
|
|
181
|
+
using it can be quite a bit slower than using more specific methods
|
|
182
|
+
like `agg` or `transform`. Pandas offers a wide range of method that will
|
|
183
|
+
be much faster than using `apply` for their specific purposes, so try to
|
|
184
|
+
use them before reaching for `apply`.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
func : callable
|
|
189
|
+
A callable that takes a dataframe as its first argument, and
|
|
190
|
+
returns a dataframe, a series or a scalar. In addition the
|
|
191
|
+
callable may take positional and keyword arguments.
|
|
192
|
+
|
|
193
|
+
output_type : {'dataframe', 'series'}, default None
|
|
194
|
+
Specify type of returned object. See `Notes` for more details.
|
|
195
|
+
|
|
196
|
+
dtypes : Series, default None
|
|
197
|
+
Specify dtypes of returned DataFrames. See `Notes` for more details.
|
|
198
|
+
|
|
199
|
+
dtype : numpy.dtype, default None
|
|
200
|
+
Specify dtype of returned Series. See `Notes` for more details.
|
|
201
|
+
|
|
202
|
+
name : str, default None
|
|
203
|
+
Specify name of returned Series. See `Notes` for more details.
|
|
204
|
+
|
|
205
|
+
index : Index, default None
|
|
206
|
+
Specify index of returned object. See `Notes` for more details.
|
|
207
|
+
|
|
208
|
+
skip_infer: bool, default False
|
|
209
|
+
Whether infer dtypes when dtypes or output_type is not specified.
|
|
210
|
+
|
|
211
|
+
args, kwargs : tuple and dict
|
|
212
|
+
Optional positional and keyword arguments to pass to `func`.
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
applied : Series or DataFrame
|
|
217
|
+
|
|
218
|
+
See Also
|
|
219
|
+
--------
|
|
220
|
+
pipe : Apply function to the full GroupBy object instead of to each
|
|
221
|
+
group.
|
|
222
|
+
aggregate : Apply aggregate function to the GroupBy object.
|
|
223
|
+
transform : Apply function column-by-column to the GroupBy object.
|
|
224
|
+
Series.apply : Apply a function to a Series.
|
|
225
|
+
DataFrame.apply : Apply a function to each row or column of a DataFrame.
|
|
226
|
+
|
|
227
|
+
Notes
|
|
228
|
+
-----
|
|
229
|
+
When deciding output dtypes and shape of the return value, MaxFrame will
|
|
230
|
+
try applying ``func`` onto a mock grouped object, and the apply call
|
|
231
|
+
may fail. When this happens, you need to specify the type of apply
|
|
232
|
+
call (DataFrame or Series) in output_type.
|
|
233
|
+
|
|
234
|
+
* For DataFrame output, you need to specify a list or a pandas Series
|
|
235
|
+
as ``dtypes`` of output DataFrame. ``index`` of output can also be
|
|
236
|
+
specified.
|
|
237
|
+
* For Series output, you need to specify ``dtype`` and ``name`` of
|
|
238
|
+
output Series.
|
|
239
|
+
"""
|
|
240
|
+
output_types = kwargs.pop("output_types", None)
|
|
241
|
+
object_type = kwargs.pop("object_type", None)
|
|
242
|
+
output_types = validate_output_types(
|
|
243
|
+
output_types=output_types, output_type=output_type, object_type=object_type
|
|
244
|
+
)
|
|
245
|
+
if output_types is None and skip_infer:
|
|
246
|
+
output_types = [OutputType.df_or_series]
|
|
247
|
+
|
|
248
|
+
dtypes = make_dtypes(dtypes)
|
|
249
|
+
dtype = make_dtype(dtype)
|
|
250
|
+
op = GroupByApply(func=func, args=args, kwds=kwargs, output_types=output_types)
|
|
251
|
+
return op(groupby, dtypes=dtypes, dtype=dtype, name=name, index=index)
|