maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import List, Optional, Union
|
|
19
|
+
|
|
20
|
+
from odps.models import Table as ODPSTable
|
|
21
|
+
|
|
22
|
+
from ... import opcodes
|
|
23
|
+
from ...config import options
|
|
24
|
+
from ...core import OutputType
|
|
25
|
+
from ...serialization.serializables import (
|
|
26
|
+
BoolField,
|
|
27
|
+
FieldTypes,
|
|
28
|
+
Int64Field,
|
|
29
|
+
ListField,
|
|
30
|
+
SeriesField,
|
|
31
|
+
StringField,
|
|
32
|
+
)
|
|
33
|
+
from ...typing_ import TileableType
|
|
34
|
+
from ..core import DataFrame # noqa: F401
|
|
35
|
+
from ..utils import parse_index
|
|
36
|
+
from .core import DataFrameDataStore
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DataFrameToODPSTable(DataFrameDataStore):
|
|
42
|
+
_op_type_ = opcodes.TO_ODPS_TABLE
|
|
43
|
+
|
|
44
|
+
dtypes = SeriesField("dtypes")
|
|
45
|
+
|
|
46
|
+
table_name = StringField("table_name", default=None)
|
|
47
|
+
partition_spec = StringField("partition_spec", default=None)
|
|
48
|
+
partition_columns = ListField("partition_columns", FieldTypes.string, default=None)
|
|
49
|
+
overwrite = BoolField("overwrite", default=None)
|
|
50
|
+
write_batch_size = Int64Field("write_batch_size", default=None)
|
|
51
|
+
unknown_as_string = BoolField("unknown_as_string", default=None)
|
|
52
|
+
index = BoolField("index", default=True)
|
|
53
|
+
index_label = ListField("index_label", FieldTypes.string, default=None)
|
|
54
|
+
lifecycle = Int64Field("lifecycle", default=None)
|
|
55
|
+
|
|
56
|
+
def __init__(self, **kw):
|
|
57
|
+
super().__init__(_output_types=[OutputType.dataframe], **kw)
|
|
58
|
+
|
|
59
|
+
def __call__(self, x):
|
|
60
|
+
shape = (0,) * len(x.shape)
|
|
61
|
+
index_value = parse_index(x.index_value.to_pandas()[:0], x.key, "index")
|
|
62
|
+
columns_value = parse_index(
|
|
63
|
+
x.columns_value.to_pandas()[:0], x.key, "columns", store_data=True
|
|
64
|
+
)
|
|
65
|
+
return self.new_dataframe(
|
|
66
|
+
[x],
|
|
67
|
+
shape=shape,
|
|
68
|
+
dtypes=x.dtypes[:0],
|
|
69
|
+
index_value=index_value,
|
|
70
|
+
columns_value=columns_value,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def to_odps_table(
|
|
75
|
+
df: TileableType,
|
|
76
|
+
table: Union[ODPSTable, str],
|
|
77
|
+
partition: Optional[str] = None,
|
|
78
|
+
partition_col: Union[None, str, List[str]] = None,
|
|
79
|
+
overwrite: bool = False,
|
|
80
|
+
unknown_as_string: Optional[bool] = None,
|
|
81
|
+
index: bool = True,
|
|
82
|
+
index_label: Union[None, str, List[str]] = None,
|
|
83
|
+
lifecycle: Optional[int] = None,
|
|
84
|
+
):
|
|
85
|
+
"""
|
|
86
|
+
Write DataFrame object into a MaxCompute (ODPS) table.
|
|
87
|
+
|
|
88
|
+
You need to provide the name of the table to write to. If you want to store
|
|
89
|
+
data into a specific partitioned of a table, argument `partition` can be used.
|
|
90
|
+
You can also use `partition_col` to specify DataFrame columns as partition
|
|
91
|
+
columns, and data in the DataFrame will be grouped by these columns and
|
|
92
|
+
inserted into partitions the values of these columns.
|
|
93
|
+
|
|
94
|
+
If the table does not exist, `to_odps_table` will create one.
|
|
95
|
+
|
|
96
|
+
Column names for indexes is determined by `index_label` argument. If the
|
|
97
|
+
argument is absent, names of the levels is used if they are not None, or
|
|
98
|
+
default names will be used. The default name for indexes with only one level
|
|
99
|
+
will be `index`, and for indexes with multiple levels, the name will be
|
|
100
|
+
`level_x` while x is the index of the level.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
table: str
|
|
105
|
+
Name ot the table to write DataFrame into
|
|
106
|
+
partition: Optional[str]
|
|
107
|
+
Spec of the partition to write to, can be 'pt1=xxx,pt2=yyy'
|
|
108
|
+
partition_col: Union[None, str, List[str]]
|
|
109
|
+
Name of columns in DataFrame as partition columns.
|
|
110
|
+
overwrite: bool
|
|
111
|
+
Overwrite data if the table / partition already exists.
|
|
112
|
+
unknown_as_string: bool
|
|
113
|
+
If True, object type in the DataFrame will be treated as strings.
|
|
114
|
+
Otherwise errors might be raised.
|
|
115
|
+
index: bool
|
|
116
|
+
If True, indexes will be stored. Otherwise they are ignored.
|
|
117
|
+
index_label: Union[None, str, List[str]]
|
|
118
|
+
Specify column names for index levels. If absent, level names or default
|
|
119
|
+
names will be used.
|
|
120
|
+
lifecycle: Optional[int]
|
|
121
|
+
Specify lifecycle of the output table.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
result: DataFrame
|
|
126
|
+
Stub DataFrame for execution.
|
|
127
|
+
|
|
128
|
+
Notes
|
|
129
|
+
-----
|
|
130
|
+
`to_odps_table` returns a stub object for execution. The result returned is
|
|
131
|
+
not reusable.
|
|
132
|
+
|
|
133
|
+
Examples
|
|
134
|
+
--------
|
|
135
|
+
|
|
136
|
+
"""
|
|
137
|
+
if isinstance(table, ODPSTable):
|
|
138
|
+
table = table.full_table_name
|
|
139
|
+
|
|
140
|
+
if isinstance(index_label, str):
|
|
141
|
+
index_label = [index_label]
|
|
142
|
+
if isinstance(partition_col, str):
|
|
143
|
+
partition_col = [partition_col]
|
|
144
|
+
|
|
145
|
+
if index_label and len(index_label) != len(df.index.names):
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"index_label needs {len(df.index.nlevels)} labels "
|
|
148
|
+
f"but it only have {len(index_label)}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
op = DataFrameToODPSTable(
|
|
152
|
+
dtypes=df.dtypes,
|
|
153
|
+
table_name=table,
|
|
154
|
+
unknown_as_string=unknown_as_string,
|
|
155
|
+
partition_spec=partition,
|
|
156
|
+
partition_columns=partition_col,
|
|
157
|
+
overwrite=overwrite,
|
|
158
|
+
index=index,
|
|
159
|
+
index_label=index_label,
|
|
160
|
+
lifecycle=lifecycle or options.session.table_lifecycle,
|
|
161
|
+
)
|
|
162
|
+
return op(df)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ..misc.accessor import CachedAccessor
|
|
16
|
+
from .accessor import (
|
|
17
|
+
DataFrameMaxFrameAccessor,
|
|
18
|
+
IndexMaxFrameAccessor,
|
|
19
|
+
SeriesMaxFrameAccessor,
|
|
20
|
+
)
|
|
21
|
+
from .reshuffle import DataFrameReshuffle, df_reshuffle
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _install():
|
|
25
|
+
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
26
|
+
|
|
27
|
+
DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
|
|
28
|
+
|
|
29
|
+
if DataFrameMaxFrameAccessor._api_count:
|
|
30
|
+
for t in DATAFRAME_TYPE:
|
|
31
|
+
t.mf = CachedAccessor("mf", DataFrameMaxFrameAccessor)
|
|
32
|
+
if SeriesMaxFrameAccessor._api_count:
|
|
33
|
+
for t in SERIES_TYPE:
|
|
34
|
+
t.mf = CachedAccessor("mf", SeriesMaxFrameAccessor)
|
|
35
|
+
if IndexMaxFrameAccessor._api_count:
|
|
36
|
+
for t in INDEX_TYPE:
|
|
37
|
+
t.mf = CachedAccessor("mf", IndexMaxFrameAccessor)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
_install()
|
|
41
|
+
del _install
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from ..core import DataFrame, Index, Series
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BaseMaxFrameAccessor:
|
|
23
|
+
def __init__(self, df_obj):
|
|
24
|
+
self._df_obj = df_obj
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def _register(cls, name, func):
|
|
28
|
+
@functools.wraps(func)
|
|
29
|
+
def wrapped(self, *args, **kw):
|
|
30
|
+
return func(self._df_obj, *args, **kw)
|
|
31
|
+
|
|
32
|
+
wrapped.__name__ = name
|
|
33
|
+
setattr(cls, name, wrapped)
|
|
34
|
+
if hasattr(cls, "_api_count"): # pragma: no branch
|
|
35
|
+
cls._api_count += 1
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DataFrameMaxFrameAccessor(BaseMaxFrameAccessor):
|
|
39
|
+
_df_obj: "DataFrame"
|
|
40
|
+
_api_count: int = 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SeriesMaxFrameAccessor(BaseMaxFrameAccessor):
|
|
44
|
+
_df_obj: "Series"
|
|
45
|
+
_api_count: int = 0
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class IndexMaxFrameAccessor(BaseMaxFrameAccessor):
|
|
49
|
+
_df_obj: "Index"
|
|
50
|
+
_api_count: int = 0
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, List, Optional, Union
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...core import get_output_types
|
|
21
|
+
from ...serialization.serializables import BoolField, ListField
|
|
22
|
+
from ..core import DataFrame, Index, IndexValue, Series
|
|
23
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
+
from ..utils import parse_index
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataFrameReshuffle(DataFrameOperator, DataFrameOperatorMixin):
|
|
28
|
+
_op_type_ = opcodes.DATAFRAME_RESHUFFLE
|
|
29
|
+
|
|
30
|
+
group_by = ListField("group_by")
|
|
31
|
+
sort_by = ListField("sort_by")
|
|
32
|
+
ascending = BoolField("ascending", default=None)
|
|
33
|
+
ignore_index = BoolField("ignore_index", default=False)
|
|
34
|
+
|
|
35
|
+
def __call__(self, df: Union[DataFrame, Series, Index]):
|
|
36
|
+
if self.ignore_index:
|
|
37
|
+
idx_value = parse_index(pd.RangeIndex(0))
|
|
38
|
+
else:
|
|
39
|
+
idx_value = df.index_value
|
|
40
|
+
if isinstance(idx_value.value, IndexValue.RangeIndex):
|
|
41
|
+
idx_value = parse_index(pd.Int64Index([0]))
|
|
42
|
+
params = df.params
|
|
43
|
+
params["index_value"] = idx_value
|
|
44
|
+
self._output_types = get_output_types(df)
|
|
45
|
+
return self.new_tileable([df], **params)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def df_reshuffle(
|
|
49
|
+
df_obj,
|
|
50
|
+
group_by: Optional[List[Any]] = None,
|
|
51
|
+
sort_by: Optional[List[Any]] = None,
|
|
52
|
+
ascending: bool = True,
|
|
53
|
+
ignore_index: bool = False,
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
Shuffle data in DataFrame or Series to make data distribution more
|
|
57
|
+
randomized.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
group_by: Optional[List[Any]]
|
|
62
|
+
Determine columns to group data while shuffling.
|
|
63
|
+
sort_by: Optional[List[Any]]
|
|
64
|
+
ascending
|
|
65
|
+
ignore_index
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
if isinstance(group_by, str):
|
|
72
|
+
group_by = [group_by]
|
|
73
|
+
if isinstance(sort_by, str):
|
|
74
|
+
sort_by = [sort_by]
|
|
75
|
+
if sort_by and not group_by:
|
|
76
|
+
raise ValueError("to use sort_by requires group_by is specified")
|
|
77
|
+
op = DataFrameReshuffle(
|
|
78
|
+
group_by=group_by,
|
|
79
|
+
sort_by=sort_by,
|
|
80
|
+
ascending=ascending,
|
|
81
|
+
ignore_index=ignore_index,
|
|
82
|
+
)
|
|
83
|
+
return op(df_obj)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
from .... import dataframe as md
|
|
19
|
+
from ...core import IndexValue
|
|
20
|
+
from ..reshuffle import DataFrameReshuffle
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_reshuffle():
|
|
24
|
+
df = pd.DataFrame(
|
|
25
|
+
{"a": [3, 4, 5, 3, 5, 4, 1, 2, 3], "b": [1, 3, 4, 5, 6, 5, 4, 4, 4]}
|
|
26
|
+
)
|
|
27
|
+
mdf = md.DataFrame(df, chunk_size=2)
|
|
28
|
+
|
|
29
|
+
with pytest.raises(ValueError):
|
|
30
|
+
mdf.mf.reshuffle(sort_by="b")
|
|
31
|
+
|
|
32
|
+
r = mdf.mf.reshuffle()
|
|
33
|
+
assert isinstance(r.op, DataFrameReshuffle)
|
|
34
|
+
assert isinstance(r.index_value.value, IndexValue.Int64Index)
|
|
35
|
+
|
|
36
|
+
r = mdf.mf.reshuffle(ignore_index=True)
|
|
37
|
+
assert isinstance(r.op, DataFrameReshuffle)
|
|
38
|
+
assert isinstance(r.index_value.value, IndexValue.RangeIndex)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .core import DataFrameFetch, DataFrameFetchShuffle
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import OutputType, register_fetch_class
|
|
16
|
+
from ...core.operator import Fetch, FetchMixin, FetchShuffle
|
|
17
|
+
from ...serialization.serializables import FieldTypes, TupleField
|
|
18
|
+
from ...utils import on_deserialize_shape, on_serialize_shape
|
|
19
|
+
from ..operators import DataFrameOperatorMixin
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataFrameFetchMixin(DataFrameOperatorMixin, FetchMixin):
|
|
23
|
+
__slots__ = ()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DataFrameFetch(Fetch, DataFrameFetchMixin):
|
|
27
|
+
# required fields
|
|
28
|
+
_shape = TupleField(
|
|
29
|
+
"shape",
|
|
30
|
+
FieldTypes.tuple,
|
|
31
|
+
on_serialize=on_serialize_shape,
|
|
32
|
+
on_deserialize=on_deserialize_shape,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def __init__(self, output_types=None, **kw):
|
|
36
|
+
super().__init__(_output_types=output_types, **kw)
|
|
37
|
+
|
|
38
|
+
def _extract_dataframe_or_series_kws(self, kws, **kw):
|
|
39
|
+
if kws is None:
|
|
40
|
+
kws = [kw]
|
|
41
|
+
new_kws = []
|
|
42
|
+
new_output_types = []
|
|
43
|
+
for output_type, kwargs in zip(self._output_types, kws):
|
|
44
|
+
if output_type == OutputType.df_or_series:
|
|
45
|
+
data_params = kwargs["data_params"]
|
|
46
|
+
data_type = kwargs["data_type"]
|
|
47
|
+
if data_type == "series":
|
|
48
|
+
new_output_types.append(OutputType.series)
|
|
49
|
+
else:
|
|
50
|
+
new_output_types.append(OutputType.dataframe)
|
|
51
|
+
new_kws.append(data_params)
|
|
52
|
+
else:
|
|
53
|
+
new_output_types.append(output_type)
|
|
54
|
+
new_kws.append(kwargs)
|
|
55
|
+
self._output_types = new_output_types
|
|
56
|
+
return new_kws
|
|
57
|
+
|
|
58
|
+
def _new_tileables(self, inputs, kws=None, **kw):
|
|
59
|
+
if "_key" in kw and self.source_key is None:
|
|
60
|
+
self.source_key = kw["_key"]
|
|
61
|
+
new_kws = self._extract_dataframe_or_series_kws(kws, **kw)
|
|
62
|
+
return super()._new_tileables(inputs, kws=new_kws, **kw)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class DataFrameFetchShuffle(FetchShuffle, DataFrameFetchMixin):
|
|
66
|
+
# required fields
|
|
67
|
+
_shape = TupleField(
|
|
68
|
+
"shape",
|
|
69
|
+
FieldTypes.int64,
|
|
70
|
+
on_serialize=on_serialize_shape,
|
|
71
|
+
on_deserialize=on_deserialize_shape,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def __init__(self, output_types=None, **kw):
|
|
75
|
+
super().__init__(_output_types=output_types, **kw)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
register_fetch_class(OutputType.dataframe, DataFrameFetch, DataFrameFetchShuffle)
|
|
79
|
+
register_fetch_class(
|
|
80
|
+
OutputType.dataframe_groupby, DataFrameFetch, DataFrameFetchShuffle
|
|
81
|
+
)
|
|
82
|
+
register_fetch_class(OutputType.df_or_series, DataFrameFetch, DataFrameFetchShuffle)
|
|
83
|
+
register_fetch_class(OutputType.series, DataFrameFetch, DataFrameFetchShuffle)
|
|
84
|
+
register_fetch_class(OutputType.series_groupby, DataFrameFetch, DataFrameFetchShuffle)
|
|
85
|
+
register_fetch_class(OutputType.index, DataFrameFetch, DataFrameFetchShuffle)
|
|
86
|
+
register_fetch_class(OutputType.categorical, DataFrameFetch, DataFrameFetchShuffle)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# noinspection PyUnresolvedReferences
|
|
16
|
+
from ..core import DataFrameGroupBy, GroupBy, SeriesGroupBy
|
|
17
|
+
from .core import NamedAgg
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _install():
|
|
21
|
+
from ..core import DATAFRAME_GROUPBY_TYPE, DATAFRAME_TYPE, GROUPBY_TYPE, SERIES_TYPE
|
|
22
|
+
from .aggregation import agg
|
|
23
|
+
from .apply import groupby_apply
|
|
24
|
+
from .core import groupby
|
|
25
|
+
from .cum import cumcount, cummax, cummin, cumprod, cumsum
|
|
26
|
+
from .fill import bfill, ffill, fillna
|
|
27
|
+
from .getitem import df_groupby_getitem
|
|
28
|
+
from .head import head
|
|
29
|
+
from .sample import groupby_sample
|
|
30
|
+
from .transform import groupby_transform
|
|
31
|
+
|
|
32
|
+
for cls in DATAFRAME_TYPE:
|
|
33
|
+
setattr(cls, "groupby", groupby)
|
|
34
|
+
|
|
35
|
+
for cls in SERIES_TYPE:
|
|
36
|
+
setattr(cls, "groupby", groupby)
|
|
37
|
+
|
|
38
|
+
for cls in GROUPBY_TYPE:
|
|
39
|
+
setattr(cls, "agg", agg)
|
|
40
|
+
setattr(cls, "aggregate", agg)
|
|
41
|
+
|
|
42
|
+
setattr(cls, "sum", lambda groupby, **kw: agg(groupby, "sum", **kw))
|
|
43
|
+
setattr(cls, "prod", lambda groupby, **kw: agg(groupby, "prod", **kw))
|
|
44
|
+
setattr(cls, "max", lambda groupby, **kw: agg(groupby, "max", **kw))
|
|
45
|
+
setattr(cls, "min", lambda groupby, **kw: agg(groupby, "min", **kw))
|
|
46
|
+
setattr(cls, "count", lambda groupby, **kw: agg(groupby, "count", **kw))
|
|
47
|
+
setattr(cls, "size", lambda groupby, **kw: agg(groupby, "size", **kw))
|
|
48
|
+
setattr(cls, "mean", lambda groupby, **kw: agg(groupby, "mean", **kw))
|
|
49
|
+
setattr(cls, "var", lambda groupby, **kw: agg(groupby, "var", **kw))
|
|
50
|
+
setattr(cls, "std", lambda groupby, **kw: agg(groupby, "std", **kw))
|
|
51
|
+
setattr(cls, "all", lambda groupby, **kw: agg(groupby, "all", **kw))
|
|
52
|
+
setattr(cls, "any", lambda groupby, **kw: agg(groupby, "any", **kw))
|
|
53
|
+
setattr(cls, "skew", lambda groupby, **kw: agg(groupby, "skew", **kw))
|
|
54
|
+
setattr(cls, "kurt", lambda groupby, **kw: agg(groupby, "kurt", **kw))
|
|
55
|
+
setattr(cls, "kurtosis", lambda groupby, **kw: agg(groupby, "kurtosis", **kw))
|
|
56
|
+
setattr(cls, "sem", lambda groupby, **kw: agg(groupby, "sem", **kw))
|
|
57
|
+
setattr(cls, "nunique", lambda groupby, **kw: agg(groupby, "nunique", **kw))
|
|
58
|
+
|
|
59
|
+
setattr(cls, "apply", groupby_apply)
|
|
60
|
+
setattr(cls, "transform", groupby_transform)
|
|
61
|
+
|
|
62
|
+
setattr(cls, "cumcount", cumcount)
|
|
63
|
+
setattr(cls, "cummin", cummin)
|
|
64
|
+
setattr(cls, "cummax", cummax)
|
|
65
|
+
setattr(cls, "cumprod", cumprod)
|
|
66
|
+
setattr(cls, "cumsum", cumsum)
|
|
67
|
+
|
|
68
|
+
setattr(cls, "head", head)
|
|
69
|
+
|
|
70
|
+
setattr(cls, "sample", groupby_sample)
|
|
71
|
+
|
|
72
|
+
setattr(cls, "ffill", ffill)
|
|
73
|
+
setattr(cls, "bfill", bfill)
|
|
74
|
+
setattr(cls, "backfill", bfill)
|
|
75
|
+
setattr(cls, "fillna", fillna)
|
|
76
|
+
|
|
77
|
+
for cls in DATAFRAME_GROUPBY_TYPE:
|
|
78
|
+
setattr(cls, "__getitem__", df_groupby_getitem)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
_install()
|
|
82
|
+
del _install
|