maxframe 0.1.0b5__cp38-cp38-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-38-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-38-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-38-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +2 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
import operator
|
|
17
|
+
from functools import reduce
|
|
18
|
+
from typing import NamedTuple
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import pytest
|
|
23
|
+
|
|
24
|
+
from .... import dataframe as md
|
|
25
|
+
from ....tensor import Tensor
|
|
26
|
+
from ...core import DataFrame, IndexValue, OutputType, Series
|
|
27
|
+
from ...datasource.dataframe import from_pandas as from_pandas_df
|
|
28
|
+
from ...datasource.series import from_pandas as from_pandas_series
|
|
29
|
+
from .. import (
|
|
30
|
+
CustomReduction,
|
|
31
|
+
DataFrameAll,
|
|
32
|
+
DataFrameAny,
|
|
33
|
+
DataFrameCount,
|
|
34
|
+
DataFrameCummax,
|
|
35
|
+
DataFrameCummin,
|
|
36
|
+
DataFrameCumprod,
|
|
37
|
+
DataFrameCumsum,
|
|
38
|
+
DataFrameKurtosis,
|
|
39
|
+
DataFrameMax,
|
|
40
|
+
DataFrameMean,
|
|
41
|
+
DataFrameMin,
|
|
42
|
+
DataFrameNunique,
|
|
43
|
+
DataFrameProd,
|
|
44
|
+
DataFrameSem,
|
|
45
|
+
DataFrameSkew,
|
|
46
|
+
DataFrameSum,
|
|
47
|
+
DataFrameVar,
|
|
48
|
+
)
|
|
49
|
+
from ..aggregation import where_function
|
|
50
|
+
from ..core import ReductionCompiler
|
|
51
|
+
|
|
52
|
+
pytestmark = pytest.mark.pd_compat
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class FunctionOptions(NamedTuple):
|
|
56
|
+
has_skipna: bool = True
|
|
57
|
+
has_numeric_only: bool = True
|
|
58
|
+
has_bool_only: bool = False
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
reduction_functions = [
|
|
62
|
+
("sum", DataFrameSum, FunctionOptions()),
|
|
63
|
+
("prod", DataFrameProd, FunctionOptions()),
|
|
64
|
+
("min", DataFrameMin, FunctionOptions()),
|
|
65
|
+
("max", DataFrameMax, FunctionOptions()),
|
|
66
|
+
("count", DataFrameCount, FunctionOptions(has_skipna=False)),
|
|
67
|
+
("mean", DataFrameMean, FunctionOptions()),
|
|
68
|
+
("var", DataFrameVar, FunctionOptions()),
|
|
69
|
+
("skew", DataFrameSkew, FunctionOptions()),
|
|
70
|
+
("kurt", DataFrameKurtosis, FunctionOptions()),
|
|
71
|
+
("sem", DataFrameSem, FunctionOptions()),
|
|
72
|
+
("all", DataFrameAll, FunctionOptions(has_numeric_only=False, has_bool_only=True)),
|
|
73
|
+
("any", DataFrameAny, FunctionOptions(has_numeric_only=False, has_bool_only=True)),
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@pytest.mark.parametrize("func_name,op,func_opts", reduction_functions)
|
|
78
|
+
def test_series_reduction(func_name, op, func_opts: FunctionOptions):
|
|
79
|
+
data = pd.Series(range(20), index=[str(i) for i in range(20)])
|
|
80
|
+
series = getattr(from_pandas_series(data, chunk_size=3), func_name)()
|
|
81
|
+
|
|
82
|
+
assert isinstance(series, Tensor)
|
|
83
|
+
assert isinstance(series.op, op)
|
|
84
|
+
assert series.shape == ()
|
|
85
|
+
|
|
86
|
+
data = pd.Series(np.random.rand(25), name="a")
|
|
87
|
+
if func_opts.has_skipna:
|
|
88
|
+
kwargs = dict(axis="index", skipna=False)
|
|
89
|
+
else:
|
|
90
|
+
kwargs = dict()
|
|
91
|
+
series = getattr(from_pandas_series(data, chunk_size=7), func_name)(**kwargs)
|
|
92
|
+
|
|
93
|
+
assert isinstance(series, Tensor)
|
|
94
|
+
assert series.shape == ()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@pytest.mark.parametrize("func_name,op,func_opts", reduction_functions)
|
|
98
|
+
def test_dataframe_reduction(func_name, op, func_opts: FunctionOptions):
|
|
99
|
+
data = pd.DataFrame(
|
|
100
|
+
{"a": list(range(20)), "b": list(range(20, 0, -1))},
|
|
101
|
+
index=[str(i) for i in range(20)],
|
|
102
|
+
)
|
|
103
|
+
reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
|
|
104
|
+
|
|
105
|
+
assert isinstance(reduction_df, Series)
|
|
106
|
+
assert isinstance(reduction_df.op, op)
|
|
107
|
+
assert isinstance(reduction_df.index_value._index_value, IndexValue.Index)
|
|
108
|
+
assert reduction_df.shape == (2,)
|
|
109
|
+
|
|
110
|
+
data = pd.DataFrame(np.random.rand(20, 10))
|
|
111
|
+
reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
|
|
112
|
+
|
|
113
|
+
assert isinstance(reduction_df, Series)
|
|
114
|
+
assert isinstance(
|
|
115
|
+
reduction_df.index_value._index_value,
|
|
116
|
+
(IndexValue.RangeIndex, IndexValue.Int64Index),
|
|
117
|
+
)
|
|
118
|
+
assert reduction_df.shape == (10,)
|
|
119
|
+
|
|
120
|
+
data = pd.DataFrame(np.random.rand(20, 20), index=[str(i) for i in range(20)])
|
|
121
|
+
reduction_df = getattr(from_pandas_df(data, chunk_size=4), func_name)(
|
|
122
|
+
axis="columns"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
assert reduction_df.shape == (20,)
|
|
126
|
+
|
|
127
|
+
with pytest.raises(NotImplementedError):
|
|
128
|
+
getattr(from_pandas_df(data, chunk_size=3), func_name)(level=0, axis=1)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
cum_reduction_functions = [
|
|
132
|
+
("cummin", DataFrameCummin, FunctionOptions()),
|
|
133
|
+
("cummax", DataFrameCummax, FunctionOptions()),
|
|
134
|
+
("cumprod", DataFrameCumprod, FunctionOptions()),
|
|
135
|
+
("cumsum", DataFrameCumsum, FunctionOptions()),
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@pytest.mark.parametrize("func_name,op,func_opts", cum_reduction_functions)
|
|
140
|
+
def test_cum_series_reduction(func_name, op, func_opts: FunctionOptions):
|
|
141
|
+
data = pd.Series({"a": list(range(20))}, index=[str(i) for i in range(20)])
|
|
142
|
+
series = getattr(from_pandas_series(data, chunk_size=3), func_name)()
|
|
143
|
+
|
|
144
|
+
assert isinstance(series, Series)
|
|
145
|
+
assert series.shape == (20,)
|
|
146
|
+
|
|
147
|
+
data = pd.Series(np.random.rand(25), name="a")
|
|
148
|
+
if func_opts.has_skipna:
|
|
149
|
+
kwargs = dict(axis="index", skipna=False)
|
|
150
|
+
else:
|
|
151
|
+
kwargs = dict()
|
|
152
|
+
series = getattr(from_pandas_series(data, chunk_size=7), func_name)(**kwargs)
|
|
153
|
+
|
|
154
|
+
assert isinstance(series, Series)
|
|
155
|
+
assert series.shape == (25,)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@pytest.mark.parametrize("func_name,op,func_opts", cum_reduction_functions)
|
|
159
|
+
def test_cum_dataframe_reduction(func_name, op, func_opts: FunctionOptions):
|
|
160
|
+
data = pd.DataFrame(
|
|
161
|
+
{"a": list(range(20)), "b": list(range(20, 0, -1))},
|
|
162
|
+
index=[str(i) for i in range(20)],
|
|
163
|
+
)
|
|
164
|
+
reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
|
|
165
|
+
|
|
166
|
+
assert isinstance(reduction_df, DataFrame)
|
|
167
|
+
assert isinstance(reduction_df.index_value._index_value, IndexValue.Index)
|
|
168
|
+
assert reduction_df.shape == (20, 2)
|
|
169
|
+
|
|
170
|
+
data = pd.DataFrame(np.random.rand(20, 10))
|
|
171
|
+
reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
|
|
172
|
+
|
|
173
|
+
assert isinstance(reduction_df, DataFrame)
|
|
174
|
+
assert isinstance(reduction_df.index_value._index_value, IndexValue.RangeIndex)
|
|
175
|
+
assert reduction_df.shape == (20, 10)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def test_nunique():
|
|
179
|
+
data = pd.DataFrame(
|
|
180
|
+
np.random.randint(0, 6, size=(20, 10)),
|
|
181
|
+
columns=["c" + str(i) for i in range(10)],
|
|
182
|
+
)
|
|
183
|
+
df = from_pandas_df(data, chunk_size=3)
|
|
184
|
+
result = df.nunique()
|
|
185
|
+
|
|
186
|
+
assert result.shape == (10,)
|
|
187
|
+
assert result.op.output_types[0] == OutputType.series
|
|
188
|
+
assert isinstance(result.op, DataFrameNunique)
|
|
189
|
+
|
|
190
|
+
data2 = data.copy()
|
|
191
|
+
df2 = from_pandas_df(data2, chunk_size=3)
|
|
192
|
+
result2 = df2.nunique(axis=1)
|
|
193
|
+
|
|
194
|
+
assert result2.shape == (20,)
|
|
195
|
+
assert result2.op.output_types[0] == OutputType.series
|
|
196
|
+
assert isinstance(result2.op, DataFrameNunique)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_dataframe_aggregate():
|
|
200
|
+
data = pd.DataFrame(np.random.rand(20, 19))
|
|
201
|
+
agg_funcs = [
|
|
202
|
+
"sum",
|
|
203
|
+
"min",
|
|
204
|
+
"max",
|
|
205
|
+
"mean",
|
|
206
|
+
"var",
|
|
207
|
+
"std",
|
|
208
|
+
"all",
|
|
209
|
+
"any",
|
|
210
|
+
"skew",
|
|
211
|
+
"kurt",
|
|
212
|
+
"sem",
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
df = from_pandas_df(data)
|
|
216
|
+
result = df.agg(agg_funcs)
|
|
217
|
+
assert result.shape == (len(agg_funcs), data.shape[1])
|
|
218
|
+
assert list(result.columns_value.to_pandas()) == list(range(19))
|
|
219
|
+
assert list(result.index_value.to_pandas()) == agg_funcs
|
|
220
|
+
assert result.op.output_types[0] == OutputType.dataframe
|
|
221
|
+
assert result.op.func == agg_funcs
|
|
222
|
+
|
|
223
|
+
df = from_pandas_df(data, chunk_size=(3, 4))
|
|
224
|
+
|
|
225
|
+
result = df.agg("sum")
|
|
226
|
+
assert result.shape == (data.shape[1],)
|
|
227
|
+
assert list(result.index_value.to_pandas()) == list(range(data.shape[1]))
|
|
228
|
+
assert result.op.output_types[0] == OutputType.series
|
|
229
|
+
assert result.op.func == ["sum"]
|
|
230
|
+
|
|
231
|
+
result = df.agg("sum", axis=1)
|
|
232
|
+
assert result.shape == (data.shape[0],)
|
|
233
|
+
assert list(result.index_value.to_pandas()) == list(range(data.shape[0]))
|
|
234
|
+
assert result.op.output_types[0] == OutputType.series
|
|
235
|
+
|
|
236
|
+
result = df.agg("var", axis=1)
|
|
237
|
+
assert result.shape == (data.shape[0],)
|
|
238
|
+
assert list(result.index_value.to_pandas()) == list(range(data.shape[0]))
|
|
239
|
+
assert result.op.output_types[0] == OutputType.series
|
|
240
|
+
assert result.op.func == ["var"]
|
|
241
|
+
|
|
242
|
+
result = df.agg(agg_funcs)
|
|
243
|
+
assert result.shape == (len(agg_funcs), data.shape[1])
|
|
244
|
+
assert list(result.columns_value.to_pandas()) == list(range(data.shape[1]))
|
|
245
|
+
assert list(result.index_value.to_pandas()) == agg_funcs
|
|
246
|
+
assert result.op.output_types[0] == OutputType.dataframe
|
|
247
|
+
assert result.op.func == agg_funcs
|
|
248
|
+
|
|
249
|
+
result = df.agg(agg_funcs, axis=1)
|
|
250
|
+
assert result.shape == (data.shape[0], len(agg_funcs))
|
|
251
|
+
assert list(result.columns_value.to_pandas()) == agg_funcs
|
|
252
|
+
assert list(result.index_value.to_pandas()) == list(range(data.shape[0]))
|
|
253
|
+
assert result.op.output_types[0] == OutputType.dataframe
|
|
254
|
+
assert result.op.func == agg_funcs
|
|
255
|
+
|
|
256
|
+
dict_fun = {0: "sum", 2: ["var", "max"], 9: ["mean", "var", "std"]}
|
|
257
|
+
all_cols = set(
|
|
258
|
+
reduce(
|
|
259
|
+
operator.add, [[v] if isinstance(v, str) else v for v in dict_fun.values()]
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
result = df.agg(dict_fun)
|
|
263
|
+
assert result.shape == (len(all_cols), len(dict_fun))
|
|
264
|
+
assert set(result.columns_value.to_pandas()) == set(dict_fun.keys())
|
|
265
|
+
assert set(result.index_value.to_pandas()) == all_cols
|
|
266
|
+
assert result.op.output_types[0] == OutputType.dataframe
|
|
267
|
+
assert result.op.func[0] == [dict_fun[0]]
|
|
268
|
+
assert result.op.func[2] == dict_fun[2]
|
|
269
|
+
|
|
270
|
+
with pytest.raises(TypeError):
|
|
271
|
+
df.agg(sum_0="sum", mean_0="mean")
|
|
272
|
+
with pytest.raises(NotImplementedError):
|
|
273
|
+
df.agg({0: ["sum", "min", "var"], 9: ["mean", "var", "std"]}, axis=1)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def test_series_aggregate():
|
|
277
|
+
data = pd.Series(np.random.rand(20), index=[str(i) for i in range(20)], name="a")
|
|
278
|
+
agg_funcs = [
|
|
279
|
+
"sum",
|
|
280
|
+
"min",
|
|
281
|
+
"max",
|
|
282
|
+
"mean",
|
|
283
|
+
"var",
|
|
284
|
+
"std",
|
|
285
|
+
"all",
|
|
286
|
+
"any",
|
|
287
|
+
"skew",
|
|
288
|
+
"kurt",
|
|
289
|
+
"sem",
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
series = from_pandas_series(data)
|
|
293
|
+
|
|
294
|
+
result = series.agg(agg_funcs)
|
|
295
|
+
assert result.shape == (len(agg_funcs),)
|
|
296
|
+
assert list(result.index_value.to_pandas()) == agg_funcs
|
|
297
|
+
assert result.op.output_types[0] == OutputType.series
|
|
298
|
+
assert result.op.func == agg_funcs
|
|
299
|
+
|
|
300
|
+
series = from_pandas_series(data, chunk_size=3)
|
|
301
|
+
|
|
302
|
+
result = series.agg("sum")
|
|
303
|
+
assert result.shape == ()
|
|
304
|
+
assert result.op.output_types[0] == OutputType.scalar
|
|
305
|
+
|
|
306
|
+
result = series.agg(agg_funcs)
|
|
307
|
+
assert result.shape == (len(agg_funcs),)
|
|
308
|
+
assert list(result.index_value.to_pandas()) == agg_funcs
|
|
309
|
+
assert result.op.output_types[0] == OutputType.series
|
|
310
|
+
assert result.op.func == agg_funcs
|
|
311
|
+
|
|
312
|
+
with pytest.raises(TypeError):
|
|
313
|
+
series.agg(sum_0=(0, "sum"), mean_0=(0, "mean"))
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def test_compile_function():
|
|
317
|
+
compiler = ReductionCompiler()
|
|
318
|
+
ms = md.Series([1, 2, 3])
|
|
319
|
+
# no MaxFrame objects inside closures
|
|
320
|
+
with pytest.raises(ValueError):
|
|
321
|
+
compiler.add_function(functools.partial(lambda x: (x + ms).sum()), ndim=2)
|
|
322
|
+
# function should return a MaxFrame object
|
|
323
|
+
with pytest.raises(ValueError):
|
|
324
|
+
compiler.add_function(lambda x: x is not None, ndim=2)
|
|
325
|
+
# function should perform some sort of reduction in dimensionality
|
|
326
|
+
with pytest.raises(ValueError):
|
|
327
|
+
compiler.add_function(lambda x: x, ndim=2)
|
|
328
|
+
# function should only contain acceptable operators
|
|
329
|
+
with pytest.raises(ValueError):
|
|
330
|
+
compiler.add_function(lambda x: x.sort_values().max(), ndim=1)
|
|
331
|
+
with pytest.raises(ValueError):
|
|
332
|
+
compiler.add_function(lambda x: x.max().shift(1), ndim=2)
|
|
333
|
+
|
|
334
|
+
# test agg for all data
|
|
335
|
+
for ndim in [1, 2]:
|
|
336
|
+
compiler = ReductionCompiler()
|
|
337
|
+
compiler.add_function(lambda x: (x**2).count() + 1, ndim=ndim)
|
|
338
|
+
result = compiler.compile()
|
|
339
|
+
# check pre_funcs
|
|
340
|
+
assert len(result.pre_funcs) == 1
|
|
341
|
+
assert b"pow" in result.pre_funcs[0].func_idl
|
|
342
|
+
# check agg_funcs
|
|
343
|
+
assert len(result.agg_funcs) == 1
|
|
344
|
+
assert result.agg_funcs[0].map_func_name == "count"
|
|
345
|
+
assert result.agg_funcs[0].agg_func_name == "sum"
|
|
346
|
+
# check post_funcs
|
|
347
|
+
assert len(result.post_funcs) == 1
|
|
348
|
+
assert result.post_funcs[0].func_name == "<lambda>"
|
|
349
|
+
assert b"add" in result.post_funcs[0].func_idl
|
|
350
|
+
|
|
351
|
+
compiler.add_function(
|
|
352
|
+
lambda x: -x.prod() ** 2 + (1 + (x**2).count()), ndim=ndim
|
|
353
|
+
)
|
|
354
|
+
result = compiler.compile()
|
|
355
|
+
# check pre_funcs
|
|
356
|
+
assert len(result.pre_funcs) == 2
|
|
357
|
+
assert (
|
|
358
|
+
b"pow" in result.pre_funcs[0].func_idl
|
|
359
|
+
or b"pow" in result.pre_funcs[1].func_idl
|
|
360
|
+
)
|
|
361
|
+
assert (
|
|
362
|
+
b"pow" not in result.pre_funcs[0].func_idl
|
|
363
|
+
or b"pow" not in result.pre_funcs[1].func_idl
|
|
364
|
+
)
|
|
365
|
+
# check agg_funcs
|
|
366
|
+
assert len(result.agg_funcs) == 2
|
|
367
|
+
assert set(result.agg_funcs[i].map_func_name for i in range(2)) == {
|
|
368
|
+
"count",
|
|
369
|
+
"prod",
|
|
370
|
+
}
|
|
371
|
+
assert set(result.agg_funcs[i].agg_func_name for i in range(2)) == {
|
|
372
|
+
"sum",
|
|
373
|
+
"prod",
|
|
374
|
+
}
|
|
375
|
+
# check post_funcs
|
|
376
|
+
assert len(result.post_funcs) == 2
|
|
377
|
+
assert result.post_funcs[0].func_name == "<lambda_0>"
|
|
378
|
+
assert b"add" in result.post_funcs[0].func_idl
|
|
379
|
+
assert b"add" in result.post_funcs[1].func_idl
|
|
380
|
+
|
|
381
|
+
compiler = ReductionCompiler()
|
|
382
|
+
compiler.add_function(
|
|
383
|
+
lambda x: where_function(x.all(), x.count(), 0), ndim=ndim
|
|
384
|
+
)
|
|
385
|
+
result = compiler.compile()
|
|
386
|
+
# check pre_funcs
|
|
387
|
+
assert len(result.pre_funcs) == 1
|
|
388
|
+
assert result.pre_funcs[0].input_key == result.pre_funcs[0].output_key
|
|
389
|
+
# check agg_funcs
|
|
390
|
+
assert len(result.agg_funcs) == 2
|
|
391
|
+
assert set(result.agg_funcs[i].map_func_name for i in range(2)) == {
|
|
392
|
+
"all",
|
|
393
|
+
"count",
|
|
394
|
+
}
|
|
395
|
+
assert set(result.agg_funcs[i].agg_func_name for i in range(2)) == {
|
|
396
|
+
"sum",
|
|
397
|
+
"all",
|
|
398
|
+
}
|
|
399
|
+
# check post_funcs
|
|
400
|
+
assert len(result.post_funcs) == 1
|
|
401
|
+
assert b"where" in result.post_funcs[0].func_idl
|
|
402
|
+
|
|
403
|
+
# check boolean expressions
|
|
404
|
+
compiler = ReductionCompiler()
|
|
405
|
+
compiler.add_function(lambda x: (x == "1").sum(), ndim=ndim)
|
|
406
|
+
result = compiler.compile()
|
|
407
|
+
# check pre_funcs
|
|
408
|
+
assert len(result.pre_funcs) == 1
|
|
409
|
+
assert b"eq" in result.pre_funcs[0].func_idl
|
|
410
|
+
# check agg_funcs
|
|
411
|
+
assert len(result.agg_funcs) == 1
|
|
412
|
+
assert result.agg_funcs[0].map_func_name == "sum"
|
|
413
|
+
assert result.agg_funcs[0].agg_func_name == "sum"
|
|
414
|
+
|
|
415
|
+
# test agg for specific columns
|
|
416
|
+
compiler = ReductionCompiler()
|
|
417
|
+
compiler.add_function(lambda x: 1 + x.sum(), ndim=2, cols=["a", "b"])
|
|
418
|
+
compiler.add_function(lambda x: -1 + x.sum(), ndim=2, cols=["b", "c"])
|
|
419
|
+
result = compiler.compile()
|
|
420
|
+
# check pre_funcs
|
|
421
|
+
assert len(result.pre_funcs) == 1
|
|
422
|
+
assert set(result.pre_funcs[0].columns) == set("abc")
|
|
423
|
+
# check agg_funcs
|
|
424
|
+
assert len(result.agg_funcs) == 1
|
|
425
|
+
assert result.agg_funcs[0].map_func_name == "sum"
|
|
426
|
+
assert result.agg_funcs[0].agg_func_name == "sum"
|
|
427
|
+
# check post_funcs
|
|
428
|
+
assert len(result.post_funcs) == 2
|
|
429
|
+
assert set("".join(sorted(result.post_funcs[i].columns)) for i in range(2)) == {
|
|
430
|
+
"ab",
|
|
431
|
+
"bc",
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
# test agg for multiple columns
|
|
435
|
+
compiler = ReductionCompiler()
|
|
436
|
+
compiler.add_function(lambda x: x.sum(), ndim=2, cols=["a"])
|
|
437
|
+
compiler.add_function(lambda x: x.sum(), ndim=2, cols=["b"])
|
|
438
|
+
compiler.add_function(lambda x: x.min(), ndim=2, cols=["c"])
|
|
439
|
+
result = compiler.compile()
|
|
440
|
+
# check pre_funcs
|
|
441
|
+
assert len(result.pre_funcs) == 1
|
|
442
|
+
assert set(result.pre_funcs[0].columns) == set("abc")
|
|
443
|
+
# check agg_funcs
|
|
444
|
+
assert len(result.agg_funcs) == 2
|
|
445
|
+
assert result.agg_funcs[0].map_func_name == "sum"
|
|
446
|
+
assert result.agg_funcs[0].agg_func_name == "sum"
|
|
447
|
+
# check post_funcs
|
|
448
|
+
assert len(result.post_funcs) == 2
|
|
449
|
+
assert set(result.post_funcs[0].columns) == set("ab")
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def test_custom_aggregation():
|
|
453
|
+
class MockReduction1(CustomReduction):
|
|
454
|
+
def agg(self, v1):
|
|
455
|
+
return v1.sum()
|
|
456
|
+
|
|
457
|
+
class MockReduction2(CustomReduction):
|
|
458
|
+
def pre(self, value):
|
|
459
|
+
return value + 1, value**2
|
|
460
|
+
|
|
461
|
+
def agg(self, v1, v2):
|
|
462
|
+
return v1.sum(), v2.prod()
|
|
463
|
+
|
|
464
|
+
def post(self, v1, v2):
|
|
465
|
+
return v1 + v2
|
|
466
|
+
|
|
467
|
+
for ndim in [1, 2]:
|
|
468
|
+
compiler = ReductionCompiler()
|
|
469
|
+
compiler.add_function(MockReduction1(), ndim=ndim)
|
|
470
|
+
result = compiler.compile()
|
|
471
|
+
# check agg_funcs
|
|
472
|
+
assert len(result.agg_funcs) == 1
|
|
473
|
+
assert result.agg_funcs[0].map_func_name == "custom_reduction"
|
|
474
|
+
assert result.agg_funcs[0].agg_func_name == "custom_reduction"
|
|
475
|
+
assert isinstance(result.agg_funcs[0].custom_reduction, MockReduction1)
|
|
476
|
+
assert result.agg_funcs[0].output_limit == 1
|
|
477
|
+
|
|
478
|
+
compiler = ReductionCompiler()
|
|
479
|
+
compiler.add_function(MockReduction2(), ndim=ndim)
|
|
480
|
+
result = compiler.compile()
|
|
481
|
+
# check agg_funcs
|
|
482
|
+
assert len(result.agg_funcs) == 1
|
|
483
|
+
assert result.agg_funcs[0].map_func_name == "custom_reduction"
|
|
484
|
+
assert result.agg_funcs[0].agg_func_name == "custom_reduction"
|
|
485
|
+
assert isinstance(result.agg_funcs[0].custom_reduction, MockReduction2)
|
|
486
|
+
assert result.agg_funcs[0].output_limit == 2
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...core import ENTITY_TYPE, OutputType
|
|
21
|
+
from ...tensor.core import TensorOrder
|
|
22
|
+
from ...utils import lazy_import
|
|
23
|
+
from ..initializer import Series as asseries
|
|
24
|
+
from .core import CustomReduction, DataFrameReductionMixin, DataFrameReductionOperator
|
|
25
|
+
|
|
26
|
+
cudf = lazy_import("cudf")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class UniqueReduction(CustomReduction):
|
|
30
|
+
def agg(self, data): # noqa: W0221 # pylint: disable=arguments-differ
|
|
31
|
+
xdf = cudf if self.is_gpu() else pd
|
|
32
|
+
# convert to series data
|
|
33
|
+
return xdf.Series(data.unique())
|
|
34
|
+
|
|
35
|
+
def post(self, data): # noqa: W0221 # pylint: disable=arguments-differ
|
|
36
|
+
return data.unique()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
40
|
+
_op_type_ = opcodes.UNIQUE
|
|
41
|
+
_func_name = "unique"
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def get_reduction_callable(cls, op):
|
|
45
|
+
return UniqueReduction(name=cls._func_name, is_gpu=op.is_gpu())
|
|
46
|
+
|
|
47
|
+
def __call__(self, a):
|
|
48
|
+
if not isinstance(a, ENTITY_TYPE):
|
|
49
|
+
a = asseries(a)
|
|
50
|
+
self.output_types = [OutputType.tensor]
|
|
51
|
+
return self.new_tileables(
|
|
52
|
+
[a], shape=(np.nan,), dtype=a.dtype, order=TensorOrder.C_ORDER
|
|
53
|
+
)[0]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def unique(values, method="tree"):
|
|
57
|
+
"""
|
|
58
|
+
Uniques are returned in order of appearance. This does NOT sort.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
values : 1d array-like
|
|
63
|
+
method : 'shuffle' or 'tree', 'tree' method provide a better performance, 'shuffle'
|
|
64
|
+
is recommended if the number of unique values is very large.
|
|
65
|
+
See Also
|
|
66
|
+
--------
|
|
67
|
+
Index.unique
|
|
68
|
+
Series.unique
|
|
69
|
+
|
|
70
|
+
Examples
|
|
71
|
+
--------
|
|
72
|
+
>>> import maxframe.dataframe as md
|
|
73
|
+
>>> import pandas as pd
|
|
74
|
+
>>> md.unique(md.Series([2, 1, 3, 3])).execute()
|
|
75
|
+
array([2, 1, 3])
|
|
76
|
+
|
|
77
|
+
>>> md.unique(md.Series([2] + [1] * 5)).execute()
|
|
78
|
+
array([2, 1])
|
|
79
|
+
|
|
80
|
+
>>> md.unique(md.Series([pd.Timestamp('20160101'),
|
|
81
|
+
... pd.Timestamp('20160101')])).execute()
|
|
82
|
+
array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
|
|
83
|
+
|
|
84
|
+
>>> md.unique(md.Series([pd.Timestamp('20160101', tz='US/Eastern'),
|
|
85
|
+
... pd.Timestamp('20160101', tz='US/Eastern')])).execute()
|
|
86
|
+
array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')],
|
|
87
|
+
dtype=object)
|
|
88
|
+
"""
|
|
89
|
+
op = DataFrameUnique(method=method)
|
|
90
|
+
return op(values)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import OutputType
|
|
17
|
+
from ...serialization.serializables import Int32Field
|
|
18
|
+
from .core import DataFrameReductionMixin, DataFrameReductionOperator
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameVar(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
22
|
+
_op_type_ = opcodes.VAR
|
|
23
|
+
_func_name = "var"
|
|
24
|
+
|
|
25
|
+
ddof = Int32Field("ddof", default=None)
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def get_reduction_callable(cls, op: "DataFrameVar"):
|
|
29
|
+
skipna, ddof = op.skipna, op.ddof
|
|
30
|
+
|
|
31
|
+
def var(x):
|
|
32
|
+
cnt = x.count()
|
|
33
|
+
if ddof == 0:
|
|
34
|
+
return (x**2).mean(skipna=skipna) - (x.mean(skipna=skipna)) ** 2
|
|
35
|
+
return ((x**2).sum(skipna=skipna) - x.sum(skipna=skipna) ** 2 / cnt) / (
|
|
36
|
+
cnt - ddof
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return var
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def var_series(series, axis=None, skipna=True, level=None, ddof=1, method=None):
|
|
43
|
+
op = DataFrameVar(
|
|
44
|
+
axis=axis,
|
|
45
|
+
skipna=skipna,
|
|
46
|
+
level=level,
|
|
47
|
+
ddof=ddof,
|
|
48
|
+
output_types=[OutputType.scalar],
|
|
49
|
+
method=method,
|
|
50
|
+
)
|
|
51
|
+
return op(series)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def var_dataframe(
|
|
55
|
+
df,
|
|
56
|
+
axis=None,
|
|
57
|
+
skipna=True,
|
|
58
|
+
level=None,
|
|
59
|
+
ddof=1,
|
|
60
|
+
numeric_only=None,
|
|
61
|
+
method=None,
|
|
62
|
+
):
|
|
63
|
+
op = DataFrameVar(
|
|
64
|
+
axis=axis,
|
|
65
|
+
skipna=skipna,
|
|
66
|
+
level=level,
|
|
67
|
+
ddof=ddof,
|
|
68
|
+
numeric_only=numeric_only,
|
|
69
|
+
output_types=[OutputType.series],
|
|
70
|
+
method=method,
|
|
71
|
+
)
|
|
72
|
+
return op(df)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .sort_index import DataFrameSortIndex
|
|
16
|
+
from .sort_values import DataFrameSortValues
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _install():
|
|
20
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
21
|
+
from .sort_index import sort_index
|
|
22
|
+
from .sort_values import dataframe_sort_values, series_sort_values
|
|
23
|
+
|
|
24
|
+
for cls in DATAFRAME_TYPE:
|
|
25
|
+
setattr(cls, "sort_values", dataframe_sort_values)
|
|
26
|
+
setattr(cls, "sort_index", sort_index)
|
|
27
|
+
|
|
28
|
+
for cls in SERIES_TYPE:
|
|
29
|
+
setattr(cls, "sort_values", series_sort_values)
|
|
30
|
+
setattr(cls, "sort_index", sort_index)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
_install()
|
|
34
|
+
del _install
|