maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from collections import namedtuple
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...core import ENTITY_TYPE, Entity, OutputType
|
|
21
|
+
from ...core.operator import MapReduceOperator
|
|
22
|
+
from ...serialization.serializables import AnyField, BoolField, Int32Field
|
|
23
|
+
from ...utils import lazy_import, no_default, pd_release_version
|
|
24
|
+
from ..core import SERIES_TYPE
|
|
25
|
+
from ..initializer import Series as asseries
|
|
26
|
+
from ..operators import DataFrameOperatorMixin
|
|
27
|
+
from ..utils import build_df, build_series, parse_index
|
|
28
|
+
|
|
29
|
+
cudf = lazy_import("cudf")
|
|
30
|
+
|
|
31
|
+
_GROUP_KEYS_NO_DEFAULT = pd_release_version >= (1, 5, 0)
|
|
32
|
+
_default_group_keys = no_default if _GROUP_KEYS_NO_DEFAULT else True
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
|
|
39
|
+
_op_type_ = opcodes.GROUPBY
|
|
40
|
+
|
|
41
|
+
by = AnyField(
|
|
42
|
+
"by",
|
|
43
|
+
default=None,
|
|
44
|
+
on_serialize=lambda x: x.data if isinstance(x, Entity) else x,
|
|
45
|
+
)
|
|
46
|
+
level = AnyField("level", default=None)
|
|
47
|
+
as_index = BoolField("as_index", default=None)
|
|
48
|
+
sort = BoolField("sort", default=None)
|
|
49
|
+
group_keys = BoolField("group_keys", default=None)
|
|
50
|
+
|
|
51
|
+
shuffle_size = Int32Field("shuffle_size", default=None)
|
|
52
|
+
|
|
53
|
+
def __init__(self, output_types=None, **kw):
|
|
54
|
+
super().__init__(_output_types=output_types, **kw)
|
|
55
|
+
if output_types:
|
|
56
|
+
if output_types[0] in (
|
|
57
|
+
OutputType.dataframe,
|
|
58
|
+
OutputType.dataframe_groupby,
|
|
59
|
+
):
|
|
60
|
+
output_types = [OutputType.dataframe_groupby]
|
|
61
|
+
elif output_types[0] == OutputType.series:
|
|
62
|
+
output_types = [OutputType.series_groupby]
|
|
63
|
+
self.output_types = output_types
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def is_dataframe_obj(self):
|
|
67
|
+
return self.output_types[0] in (
|
|
68
|
+
OutputType.dataframe_groupby,
|
|
69
|
+
OutputType.dataframe,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def groupby_params(self):
|
|
74
|
+
return dict(
|
|
75
|
+
by=self.by,
|
|
76
|
+
level=self.level,
|
|
77
|
+
as_index=self.as_index,
|
|
78
|
+
sort=self.sort,
|
|
79
|
+
group_keys=self.group_keys,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def build_mock_groupby(self, **kwargs):
|
|
83
|
+
in_df = self.inputs[0]
|
|
84
|
+
if self.is_dataframe_obj:
|
|
85
|
+
mock_obj = build_df(
|
|
86
|
+
in_df, size=[2, 2], fill_value=[1, 2], ensure_string=True
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
mock_obj = build_series(
|
|
90
|
+
in_df,
|
|
91
|
+
size=[2, 2],
|
|
92
|
+
fill_value=[1, 2],
|
|
93
|
+
name=in_df.name,
|
|
94
|
+
ensure_string=True,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
new_kw = self.groupby_params
|
|
98
|
+
new_kw.update(kwargs)
|
|
99
|
+
if new_kw.get("level"):
|
|
100
|
+
new_kw["level"] = 0
|
|
101
|
+
if isinstance(new_kw["by"], list):
|
|
102
|
+
new_by = []
|
|
103
|
+
for v in new_kw["by"]:
|
|
104
|
+
if isinstance(v, ENTITY_TYPE):
|
|
105
|
+
build_fun = build_df if v.ndim == 2 else build_series
|
|
106
|
+
mock_by = pd.concat(
|
|
107
|
+
[
|
|
108
|
+
build_fun(v, size=2, fill_value=1, name=v.name),
|
|
109
|
+
build_fun(v, size=2, fill_value=2, name=v.name),
|
|
110
|
+
]
|
|
111
|
+
)
|
|
112
|
+
new_by.append(mock_by)
|
|
113
|
+
else:
|
|
114
|
+
new_by.append(v)
|
|
115
|
+
new_kw["by"] = new_by
|
|
116
|
+
return mock_obj.groupby(**new_kw)
|
|
117
|
+
|
|
118
|
+
def _set_inputs(self, inputs):
|
|
119
|
+
super()._set_inputs(inputs)
|
|
120
|
+
inputs_iter = iter(self._inputs[1:])
|
|
121
|
+
if len(inputs) > 1:
|
|
122
|
+
by = []
|
|
123
|
+
for k in self.by:
|
|
124
|
+
if isinstance(k, SERIES_TYPE):
|
|
125
|
+
by.append(next(inputs_iter))
|
|
126
|
+
else:
|
|
127
|
+
by.append(k)
|
|
128
|
+
self.by = by
|
|
129
|
+
|
|
130
|
+
def __call__(self, df):
|
|
131
|
+
params = df.params.copy()
|
|
132
|
+
params["index_value"] = parse_index(None, df.key, df.index_value.key)
|
|
133
|
+
if df.ndim == 2:
|
|
134
|
+
if isinstance(self.by, list):
|
|
135
|
+
index, types = [], []
|
|
136
|
+
for k in self.by:
|
|
137
|
+
if isinstance(k, SERIES_TYPE):
|
|
138
|
+
index.append(k.name)
|
|
139
|
+
types.append(k.dtype)
|
|
140
|
+
elif k in df.dtypes:
|
|
141
|
+
index.append(k)
|
|
142
|
+
types.append(df.dtypes[k])
|
|
143
|
+
else:
|
|
144
|
+
raise KeyError(k)
|
|
145
|
+
params["key_dtypes"] = pd.Series(types, index=index)
|
|
146
|
+
|
|
147
|
+
inputs = [df]
|
|
148
|
+
if isinstance(self.by, list):
|
|
149
|
+
for k in self.by:
|
|
150
|
+
if isinstance(k, SERIES_TYPE):
|
|
151
|
+
inputs.append(k)
|
|
152
|
+
|
|
153
|
+
return self.new_tileable(inputs, **params)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def groupby(
|
|
157
|
+
df, by=None, level=None, as_index=True, sort=True, group_keys=_default_group_keys
|
|
158
|
+
):
|
|
159
|
+
if not as_index and df.op.output_types[0] == OutputType.series:
|
|
160
|
+
raise TypeError("as_index=False only valid with DataFrame")
|
|
161
|
+
|
|
162
|
+
output_types = (
|
|
163
|
+
[OutputType.dataframe_groupby] if df.ndim == 2 else [OutputType.series_groupby]
|
|
164
|
+
)
|
|
165
|
+
if isinstance(by, (SERIES_TYPE, pd.Series)):
|
|
166
|
+
if isinstance(by, pd.Series):
|
|
167
|
+
by = asseries(by)
|
|
168
|
+
by = [by]
|
|
169
|
+
elif df.ndim > 1 and by is not None and not isinstance(by, list):
|
|
170
|
+
by = [by]
|
|
171
|
+
op = DataFrameGroupByOperator(
|
|
172
|
+
by=by,
|
|
173
|
+
level=level,
|
|
174
|
+
as_index=as_index,
|
|
175
|
+
sort=sort,
|
|
176
|
+
group_keys=group_keys if group_keys is not no_default else None,
|
|
177
|
+
output_types=output_types,
|
|
178
|
+
)
|
|
179
|
+
return op(df)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ...serialization.serializables import AnyField, BoolField
|
|
20
|
+
from ...utils import lazy_import
|
|
21
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
22
|
+
from ..utils import parse_index, validate_axis
|
|
23
|
+
|
|
24
|
+
cudf = lazy_import("cudf")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class GroupByCumReductionOperator(DataFrameOperatorMixin, DataFrameOperator):
|
|
28
|
+
_op_module_ = "dataframe.groupby"
|
|
29
|
+
|
|
30
|
+
axis = AnyField("axis", default=None)
|
|
31
|
+
ascending = BoolField("ascending", default=None)
|
|
32
|
+
|
|
33
|
+
def __init__(self, output_types=None, **kw):
|
|
34
|
+
super().__init__(_output_types=output_types, **kw)
|
|
35
|
+
|
|
36
|
+
def _calc_out_dtypes(self, in_groupby):
|
|
37
|
+
mock_groupby = in_groupby.op.build_mock_groupby()
|
|
38
|
+
func_name = getattr(self, "_func_name")
|
|
39
|
+
|
|
40
|
+
if func_name == "cumcount":
|
|
41
|
+
result_df = mock_groupby.cumcount(ascending=self.ascending)
|
|
42
|
+
else:
|
|
43
|
+
result_df = getattr(mock_groupby, func_name)(axis=self.axis)
|
|
44
|
+
|
|
45
|
+
if isinstance(result_df, pd.DataFrame):
|
|
46
|
+
self.output_types = [OutputType.dataframe]
|
|
47
|
+
return result_df.dtypes
|
|
48
|
+
else:
|
|
49
|
+
self.output_types = [OutputType.series]
|
|
50
|
+
return result_df.name, result_df.dtype
|
|
51
|
+
|
|
52
|
+
def __call__(self, groupby):
|
|
53
|
+
in_df = groupby
|
|
54
|
+
while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
55
|
+
in_df = in_df.inputs[0]
|
|
56
|
+
|
|
57
|
+
self.axis = validate_axis(self.axis or 0, in_df)
|
|
58
|
+
|
|
59
|
+
out_dtypes = self._calc_out_dtypes(groupby)
|
|
60
|
+
|
|
61
|
+
kw = in_df.params.copy()
|
|
62
|
+
kw["index_value"] = parse_index(pd.RangeIndex(-1), groupby.key)
|
|
63
|
+
if self.output_types[0] == OutputType.dataframe:
|
|
64
|
+
kw.update(
|
|
65
|
+
dict(
|
|
66
|
+
columns_value=parse_index(out_dtypes.index, store_data=True),
|
|
67
|
+
dtypes=out_dtypes,
|
|
68
|
+
shape=(groupby.shape[0], len(out_dtypes)),
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
else:
|
|
72
|
+
name, dtype = out_dtypes
|
|
73
|
+
kw.update(dtype=dtype, name=name, shape=(groupby.shape[0],))
|
|
74
|
+
return self.new_tileable([groupby], **kw)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class GroupByCummin(GroupByCumReductionOperator):
|
|
78
|
+
_op_type_ = opcodes.CUMMIN
|
|
79
|
+
_func_name = "cummin"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class GroupByCummax(GroupByCumReductionOperator):
|
|
83
|
+
_op_type_ = opcodes.CUMMAX
|
|
84
|
+
_func_name = "cummax"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class GroupByCumsum(GroupByCumReductionOperator):
|
|
88
|
+
_op_type_ = opcodes.CUMSUM
|
|
89
|
+
_func_name = "cumsum"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class GroupByCumprod(GroupByCumReductionOperator):
|
|
93
|
+
_op_type_ = opcodes.CUMPROD
|
|
94
|
+
_func_name = "cumprod"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class GroupByCumcount(GroupByCumReductionOperator):
|
|
98
|
+
_op_type_ = opcodes.CUMCOUNT
|
|
99
|
+
_func_name = "cumcount"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def cumcount(groupby, ascending: bool = True):
|
|
103
|
+
op = GroupByCumcount(ascending=ascending)
|
|
104
|
+
return op(groupby)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def cummin(groupby, axis=0):
|
|
108
|
+
op = GroupByCummin(axis=axis)
|
|
109
|
+
return op(groupby)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def cummax(groupby, axis=0):
|
|
113
|
+
op = GroupByCummax(axis=axis)
|
|
114
|
+
return op(groupby)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def cumprod(groupby, axis=0):
|
|
118
|
+
op = GroupByCumprod(axis=axis)
|
|
119
|
+
return op(groupby)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def cumsum(groupby, axis=0):
|
|
123
|
+
op = GroupByCumsum(axis=axis)
|
|
124
|
+
return op(groupby)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ...serialization.serializables import AnyField, DictField, Int64Field, StringField
|
|
20
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
21
|
+
from ..utils import parse_index
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GroupByFillOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
25
|
+
_op_module_ = "dataframe.groupby"
|
|
26
|
+
|
|
27
|
+
value = AnyField("value", default=None)
|
|
28
|
+
method = StringField("method", default=None)
|
|
29
|
+
axis = AnyField("axis", default=0)
|
|
30
|
+
limit = Int64Field("limit", default=None)
|
|
31
|
+
downcast = DictField("downcast", default=None)
|
|
32
|
+
|
|
33
|
+
def _calc_out_dtypes(self, in_groupby):
|
|
34
|
+
mock_groupby = in_groupby.op.build_mock_groupby()
|
|
35
|
+
func_name = getattr(self, "_func_name")
|
|
36
|
+
|
|
37
|
+
if func_name == "fillna":
|
|
38
|
+
result_df = mock_groupby.fillna(
|
|
39
|
+
value=self.value,
|
|
40
|
+
method=self.method,
|
|
41
|
+
axis=self.axis,
|
|
42
|
+
limit=self.limit,
|
|
43
|
+
downcast=self.downcast,
|
|
44
|
+
)
|
|
45
|
+
else:
|
|
46
|
+
result_df = getattr(mock_groupby, func_name)(limit=self.limit)
|
|
47
|
+
|
|
48
|
+
if isinstance(result_df, pd.DataFrame):
|
|
49
|
+
self.output_types = [OutputType.dataframe]
|
|
50
|
+
return result_df.dtypes
|
|
51
|
+
else:
|
|
52
|
+
self.output_types = [OutputType.series]
|
|
53
|
+
return result_df.name, result_df.dtype
|
|
54
|
+
|
|
55
|
+
def __call__(self, groupby):
|
|
56
|
+
in_df = groupby
|
|
57
|
+
while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
58
|
+
in_df = in_df.inputs[0]
|
|
59
|
+
out_dtypes = self._calc_out_dtypes(groupby)
|
|
60
|
+
|
|
61
|
+
kw = in_df.params.copy()
|
|
62
|
+
kw["index_value"] = parse_index(pd.RangeIndex(-1), groupby.key)
|
|
63
|
+
if self.output_types[0] == OutputType.dataframe:
|
|
64
|
+
kw.update(
|
|
65
|
+
dict(
|
|
66
|
+
columns_value=parse_index(out_dtypes.index, store_data=True),
|
|
67
|
+
dtypes=out_dtypes,
|
|
68
|
+
shape=(groupby.shape[0], len(out_dtypes)),
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
else:
|
|
72
|
+
name, dtype = out_dtypes
|
|
73
|
+
kw.update(dtype=dtype, name=name, shape=(groupby.shape[0],))
|
|
74
|
+
return self.new_tileable([groupby], **kw)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class GroupByFFill(GroupByFillOperator):
|
|
78
|
+
_op_type_ = opcodes.FILL_NA
|
|
79
|
+
_func_name = "ffill"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class GroupByBFill(GroupByFillOperator):
|
|
83
|
+
_op_type = opcodes.FILL_NA
|
|
84
|
+
_func_name = "bfill"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class GroupByFillNa(GroupByFillOperator):
|
|
88
|
+
_op_type = opcodes.FILL_NA
|
|
89
|
+
_func_name = "fillna"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def ffill(groupby, limit=None):
|
|
93
|
+
"""
|
|
94
|
+
Forward fill the values.
|
|
95
|
+
|
|
96
|
+
limit: int, default None
|
|
97
|
+
Limit number of values to fill
|
|
98
|
+
|
|
99
|
+
return: Series or DataFrame
|
|
100
|
+
"""
|
|
101
|
+
op = GroupByFFill(limit=limit)
|
|
102
|
+
return op(groupby)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def bfill(groupby, limit=None):
|
|
106
|
+
"""
|
|
107
|
+
Backward fill the values.
|
|
108
|
+
|
|
109
|
+
limit: int, default None
|
|
110
|
+
Limit number of values to fill
|
|
111
|
+
|
|
112
|
+
return: Series or DataFrame
|
|
113
|
+
"""
|
|
114
|
+
op = GroupByBFill(limit=limit)
|
|
115
|
+
return op(groupby)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def fillna(groupby, value=None, method=None, axis=None, limit=None, downcast=None):
|
|
119
|
+
"""
|
|
120
|
+
Fill NA/NaN values using the specified method
|
|
121
|
+
|
|
122
|
+
value: scalar, dict, Series, or DataFrame
|
|
123
|
+
Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame
|
|
124
|
+
of values specifying which value to use for each index (for a Series) or
|
|
125
|
+
column (for a DataFrame). Values not in the dict/Series/DataFrame
|
|
126
|
+
will not be filled. This value cannot be a list.
|
|
127
|
+
method: {'backfill','bfill','ffill',None}, default None
|
|
128
|
+
axis: {0 or 'index', 1 or 'column'}
|
|
129
|
+
limit: int, default None
|
|
130
|
+
If method is specified, this is the maximum number of consecutive
|
|
131
|
+
NaN values to forward/backward fill
|
|
132
|
+
downcast: dict, default None
|
|
133
|
+
A dict of item->dtype of what to downcast if possible,
|
|
134
|
+
or the string ‘infer’ which will try to downcast to an appropriate equal type
|
|
135
|
+
|
|
136
|
+
return: DataFrame or None
|
|
137
|
+
"""
|
|
138
|
+
op = GroupByFillNa(
|
|
139
|
+
value=value, method=method, axis=axis, limit=limit, downcast=downcast
|
|
140
|
+
)
|
|
141
|
+
return op(groupby)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from collections.abc import Iterable
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ...serialization.serializables import AnyField
|
|
20
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
21
|
+
from ..utils import parse_index
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GroupByIndex(DataFrameOperatorMixin, DataFrameOperator):
|
|
25
|
+
_op_type_ = opcodes.INDEX
|
|
26
|
+
_op_module_ = "dataframe.groupby"
|
|
27
|
+
|
|
28
|
+
selection = AnyField("selection", default=None)
|
|
29
|
+
|
|
30
|
+
def __init__(self, output_types=None, **kw):
|
|
31
|
+
super().__init__(_output_types=output_types, **kw)
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def groupby_params(self):
|
|
35
|
+
params = self.inputs[0].op.groupby_params
|
|
36
|
+
params["selection"] = self.selection
|
|
37
|
+
return params
|
|
38
|
+
|
|
39
|
+
def build_mock_groupby(self, **kwargs):
|
|
40
|
+
groupby_op = self.inputs[0].op
|
|
41
|
+
return groupby_op.build_mock_groupby(**kwargs)[self.selection]
|
|
42
|
+
|
|
43
|
+
def __call__(self, groupby):
|
|
44
|
+
indexed = groupby.op.build_mock_groupby()[self.selection]
|
|
45
|
+
|
|
46
|
+
if indexed.ndim == 1:
|
|
47
|
+
self.output_types = [OutputType.series_groupby]
|
|
48
|
+
params = dict(
|
|
49
|
+
shape=(groupby.shape[0],),
|
|
50
|
+
name=self.selection,
|
|
51
|
+
dtype=groupby.dtypes[self.selection],
|
|
52
|
+
index_value=groupby.index_value,
|
|
53
|
+
key_dtypes=groupby.key_dtypes,
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
self.output_types = [OutputType.dataframe_groupby]
|
|
57
|
+
|
|
58
|
+
if isinstance(self.selection, Iterable) and not isinstance(
|
|
59
|
+
self.selection, str
|
|
60
|
+
):
|
|
61
|
+
item_list = list(self.selection)
|
|
62
|
+
else:
|
|
63
|
+
item_list = [self.selection]
|
|
64
|
+
|
|
65
|
+
params = groupby.params.copy()
|
|
66
|
+
params["dtypes"] = new_dtypes = groupby.dtypes[item_list]
|
|
67
|
+
params["selection"] = self.selection
|
|
68
|
+
params["shape"] = (groupby.shape[0], len(item_list))
|
|
69
|
+
params["columns_value"] = parse_index(new_dtypes.index, store_data=True)
|
|
70
|
+
|
|
71
|
+
return self.new_tileable([groupby], **params)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def df_groupby_getitem(df_groupby, item):
|
|
75
|
+
try:
|
|
76
|
+
hash(item)
|
|
77
|
+
hashable = True
|
|
78
|
+
except TypeError:
|
|
79
|
+
hashable = False
|
|
80
|
+
|
|
81
|
+
if hashable and item in df_groupby.dtypes:
|
|
82
|
+
output_types = [OutputType.series_groupby]
|
|
83
|
+
elif isinstance(item, Iterable) and all(it in df_groupby.dtypes for it in item):
|
|
84
|
+
output_types = [OutputType.dataframe_groupby]
|
|
85
|
+
else:
|
|
86
|
+
raise NameError(f"Cannot slice groupby with {item!r}")
|
|
87
|
+
|
|
88
|
+
if df_groupby.selection:
|
|
89
|
+
raise IndexError(f"Column(s) {df_groupby.selection!r} already selected")
|
|
90
|
+
|
|
91
|
+
op = GroupByIndex(selection=item, output_types=output_types)
|
|
92
|
+
return op(df_groupby)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ... import opcodes
|
|
19
|
+
from ...core import OutputType
|
|
20
|
+
from ...serialization.serializables import BoolField, DictField, Int64Field
|
|
21
|
+
from ...utils import pd_release_version
|
|
22
|
+
from ..core import IndexValue
|
|
23
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
+
from ..utils import parse_index
|
|
25
|
+
|
|
26
|
+
_pandas_enable_negative = pd_release_version >= (1, 4, 0)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class GroupByHead(DataFrameOperator, DataFrameOperatorMixin):
|
|
30
|
+
_op_type_ = opcodes.GROUPBY_HEAD
|
|
31
|
+
_op_module_ = "dataframe.groupby"
|
|
32
|
+
|
|
33
|
+
row_count = Int64Field("row_count", default=5)
|
|
34
|
+
groupby_params = DictField("groupby_params", default=dict())
|
|
35
|
+
enable_negative = BoolField("enable_negative", default=_pandas_enable_negative)
|
|
36
|
+
|
|
37
|
+
def __call__(self, groupby):
|
|
38
|
+
df = groupby
|
|
39
|
+
while df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
40
|
+
df = df.inputs[0]
|
|
41
|
+
|
|
42
|
+
selection = groupby.op.groupby_params.pop("selection", None)
|
|
43
|
+
if df.ndim > 1 and selection:
|
|
44
|
+
if isinstance(selection, tuple) and selection not in df.dtypes:
|
|
45
|
+
selection = list(selection)
|
|
46
|
+
|
|
47
|
+
result_df = df[selection]
|
|
48
|
+
else:
|
|
49
|
+
result_df = df
|
|
50
|
+
|
|
51
|
+
self._output_types = (
|
|
52
|
+
[OutputType.dataframe] if result_df.ndim == 2 else [OutputType.series]
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
params = result_df.params
|
|
56
|
+
params["shape"] = (np.nan,) + result_df.shape[1:]
|
|
57
|
+
if isinstance(df.index_value.value, IndexValue.RangeIndex):
|
|
58
|
+
params["index_value"] = parse_index(pd.RangeIndex(-1), df.key)
|
|
59
|
+
|
|
60
|
+
return self.new_tileable([df], **params)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def head(groupby, n=5):
|
|
64
|
+
"""
|
|
65
|
+
Return first n rows of each group.
|
|
66
|
+
|
|
67
|
+
Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows
|
|
68
|
+
from the original Series or DataFrame with original index and order preserved
|
|
69
|
+
(``as_index`` flag is ignored).
|
|
70
|
+
|
|
71
|
+
Does not work for negative values of `n`.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
Series or DataFrame
|
|
76
|
+
|
|
77
|
+
See Also
|
|
78
|
+
--------
|
|
79
|
+
Series.groupby
|
|
80
|
+
DataFrame.groupby
|
|
81
|
+
|
|
82
|
+
Examples
|
|
83
|
+
--------
|
|
84
|
+
|
|
85
|
+
>>> import maxframe.dataframe as md
|
|
86
|
+
>>> df = md.DataFrame([[1, 2], [1, 4], [5, 6]],
|
|
87
|
+
... columns=['A', 'B'])
|
|
88
|
+
>>> df.groupby('A').head(1).execute()
|
|
89
|
+
A B
|
|
90
|
+
0 1 2
|
|
91
|
+
2 5 6
|
|
92
|
+
>>> df.groupby('A').head(-1).execute()
|
|
93
|
+
Empty DataFrame
|
|
94
|
+
Columns: [A, B]
|
|
95
|
+
Index: []
|
|
96
|
+
"""
|
|
97
|
+
groupby_params = groupby.op.groupby_params.copy()
|
|
98
|
+
groupby_params.pop("as_index", None)
|
|
99
|
+
|
|
100
|
+
op = GroupByHead(
|
|
101
|
+
row_count=n,
|
|
102
|
+
groupby_params=groupby_params,
|
|
103
|
+
enable_negative=_pandas_enable_negative,
|
|
104
|
+
)
|
|
105
|
+
return op(groupby)
|