maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ... import opcodes
|
|
19
|
+
from ...serialization.serializables import BoolField
|
|
20
|
+
from ..operators import OutputType
|
|
21
|
+
from ..utils import gen_unknown_index_value, parse_index
|
|
22
|
+
from ._duplicate import DuplicateOperand, validate_subset
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DataFrameDropDuplicates(DuplicateOperand):
|
|
26
|
+
_op_type_ = opcodes.DROP_DUPLICATES
|
|
27
|
+
|
|
28
|
+
ignore_index = BoolField("ignore_index", default=True)
|
|
29
|
+
|
|
30
|
+
def __init__(self, output_types=None, **kw):
|
|
31
|
+
super().__init__(_output_types=output_types, **kw)
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def _get_shape(cls, input_shape, op: "DataFrameDropDuplicates"):
|
|
35
|
+
shape = (np.nan,) + input_shape[1:]
|
|
36
|
+
if op.output_types[0] == OutputType.dataframe and len(shape) == 1:
|
|
37
|
+
shape += (3,)
|
|
38
|
+
return shape
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def _gen_tileable_params(cls, op: "DataFrameDropDuplicates", input_params):
|
|
42
|
+
params = input_params.copy()
|
|
43
|
+
if op.ignore_index:
|
|
44
|
+
params["index_value"] = parse_index(pd.RangeIndex(-1))
|
|
45
|
+
else:
|
|
46
|
+
params["index_value"] = gen_unknown_index_value(
|
|
47
|
+
input_params["index_value"], op.keep, op.subset, type(op).__name__
|
|
48
|
+
)
|
|
49
|
+
params["shape"] = cls._get_shape(input_params["shape"], op)
|
|
50
|
+
return params
|
|
51
|
+
|
|
52
|
+
def __call__(self, inp, inplace=False):
|
|
53
|
+
self._output_types = inp.op.output_types
|
|
54
|
+
params = self._gen_tileable_params(self, inp.params)
|
|
55
|
+
|
|
56
|
+
ret = self.new_tileable([inp], kws=[params])
|
|
57
|
+
if inplace:
|
|
58
|
+
inp.data = ret.data
|
|
59
|
+
return ret
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def df_drop_duplicates(
|
|
63
|
+
df, subset=None, keep="first", inplace=False, ignore_index=False, method="auto"
|
|
64
|
+
):
|
|
65
|
+
"""
|
|
66
|
+
Return DataFrame with duplicate rows removed.
|
|
67
|
+
|
|
68
|
+
Considering certain columns is optional. Indexes, including time indexes
|
|
69
|
+
are ignored.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
subset : column label or sequence of labels, optional
|
|
74
|
+
Only consider certain columns for identifying duplicates, by
|
|
75
|
+
default use all of the columns.
|
|
76
|
+
keep : {'first', 'last', False}, default 'first'
|
|
77
|
+
Determines which duplicates (if any) to keep.
|
|
78
|
+
- ``first`` : Drop duplicates except for the first occurrence.
|
|
79
|
+
- ``last`` : Drop duplicates except for the last occurrence.
|
|
80
|
+
- False : Drop all duplicates.
|
|
81
|
+
inplace : bool, default False
|
|
82
|
+
Whether to drop duplicates in place or to return a copy.
|
|
83
|
+
ignore_index : bool, default False
|
|
84
|
+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
DataFrame
|
|
89
|
+
DataFrame with duplicates removed or None if ``inplace=True``.
|
|
90
|
+
"""
|
|
91
|
+
if keep not in ("first", "last", False):
|
|
92
|
+
raise ValueError("keep could only be one of 'first', 'last' or False")
|
|
93
|
+
if method not in ("auto", "tree", "subset_tree", "shuffle", None):
|
|
94
|
+
raise ValueError(
|
|
95
|
+
"method could only be one of "
|
|
96
|
+
"'auto', 'tree', 'subset_tree', 'shuffle' or None"
|
|
97
|
+
)
|
|
98
|
+
subset = validate_subset(df, subset)
|
|
99
|
+
op = DataFrameDropDuplicates(
|
|
100
|
+
subset=subset, keep=keep, ignore_index=ignore_index, method=method
|
|
101
|
+
)
|
|
102
|
+
return op(df, inplace=inplace)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def series_drop_duplicates(
|
|
106
|
+
series, keep="first", inplace=False, ignore_index=False, method="auto"
|
|
107
|
+
):
|
|
108
|
+
"""
|
|
109
|
+
Return Series with duplicate values removed.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
keep : {'first', 'last', ``False``}, default 'first'
|
|
114
|
+
Method to handle dropping duplicates:
|
|
115
|
+
|
|
116
|
+
- 'first' : Drop duplicates except for the first occurrence.
|
|
117
|
+
- 'last' : Drop duplicates except for the last occurrence.
|
|
118
|
+
- ``False`` : Drop all duplicates.
|
|
119
|
+
|
|
120
|
+
inplace : bool, default ``False``
|
|
121
|
+
If ``True``, performs operation inplace and returns None.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
Series
|
|
126
|
+
Series with duplicates dropped.
|
|
127
|
+
|
|
128
|
+
See Also
|
|
129
|
+
--------
|
|
130
|
+
Index.drop_duplicates : Equivalent method on Index.
|
|
131
|
+
DataFrame.drop_duplicates : Equivalent method on DataFrame.
|
|
132
|
+
Series.duplicated : Related method on Series, indicating duplicate
|
|
133
|
+
Series values.
|
|
134
|
+
|
|
135
|
+
Examples
|
|
136
|
+
--------
|
|
137
|
+
Generate a Series with duplicated entries.
|
|
138
|
+
|
|
139
|
+
>>> import maxframe.dataframe as md
|
|
140
|
+
>>> s = md.Series(['lame', 'cow', 'lame', 'beetle', 'lame', 'hippo'],
|
|
141
|
+
... name='animal')
|
|
142
|
+
>>> s.execute()
|
|
143
|
+
0 lame
|
|
144
|
+
1 cow
|
|
145
|
+
2 lame
|
|
146
|
+
3 beetle
|
|
147
|
+
4 lame
|
|
148
|
+
5 hippo
|
|
149
|
+
Name: animal, dtype: object
|
|
150
|
+
|
|
151
|
+
With the 'keep' parameter, the selection behaviour of duplicated values
|
|
152
|
+
can be changed. The value 'first' keeps the first occurrence for each
|
|
153
|
+
set of duplicated entries. The default value of keep is 'first'.
|
|
154
|
+
|
|
155
|
+
>>> s.drop_duplicates().execute()
|
|
156
|
+
0 lame
|
|
157
|
+
1 cow
|
|
158
|
+
3 beetle
|
|
159
|
+
5 hippo
|
|
160
|
+
Name: animal, dtype: object
|
|
161
|
+
|
|
162
|
+
The value 'last' for parameter 'keep' keeps the last occurrence for
|
|
163
|
+
each set of duplicated entries.
|
|
164
|
+
|
|
165
|
+
>>> s.drop_duplicates(keep='last').execute()
|
|
166
|
+
1 cow
|
|
167
|
+
3 beetle
|
|
168
|
+
4 lame
|
|
169
|
+
5 hippo
|
|
170
|
+
Name: animal, dtype: object
|
|
171
|
+
|
|
172
|
+
The value ``False`` for parameter 'keep' discards all sets of
|
|
173
|
+
duplicated entries. Setting the value of 'inplace' to ``True`` performs
|
|
174
|
+
the operation inplace and returns ``None``.
|
|
175
|
+
|
|
176
|
+
>>> s.drop_duplicates(keep=False, inplace=True)
|
|
177
|
+
>>> s.execute()
|
|
178
|
+
1 cow
|
|
179
|
+
3 beetle
|
|
180
|
+
5 hippo
|
|
181
|
+
Name: animal, dtype: object
|
|
182
|
+
"""
|
|
183
|
+
if keep not in ("first", "last", False):
|
|
184
|
+
raise ValueError("keep could only be one of 'first', 'last' or False")
|
|
185
|
+
if method not in ("auto", "tree", "shuffle", None):
|
|
186
|
+
raise ValueError(
|
|
187
|
+
"method could only be one of 'auto', 'tree', 'shuffle' or None"
|
|
188
|
+
)
|
|
189
|
+
op = DataFrameDropDuplicates(keep=keep, ignore_index=ignore_index, method=method)
|
|
190
|
+
return op(series, inplace=inplace)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def index_drop_duplicates(index, keep="first", method="auto"):
|
|
194
|
+
"""
|
|
195
|
+
Return Index with duplicate values removed.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
keep : {'first', 'last', ``False``}, default 'first'
|
|
200
|
+
- 'first' : Drop duplicates except for the first occurrence.
|
|
201
|
+
- 'last' : Drop duplicates except for the last occurrence.
|
|
202
|
+
- ``False`` : Drop all duplicates.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
deduplicated : Index
|
|
207
|
+
|
|
208
|
+
See Also
|
|
209
|
+
--------
|
|
210
|
+
Series.drop_duplicates : Equivalent method on Series.
|
|
211
|
+
DataFrame.drop_duplicates : Equivalent method on DataFrame.
|
|
212
|
+
Index.duplicated : Related method on Index, indicating duplicate
|
|
213
|
+
Index values.
|
|
214
|
+
|
|
215
|
+
Examples
|
|
216
|
+
--------
|
|
217
|
+
Generate a pandas.Index with duplicate values.
|
|
218
|
+
|
|
219
|
+
>>> import maxframe.dataframe as md
|
|
220
|
+
|
|
221
|
+
>>> idx = md.Index(['lame', 'cow', 'lame', 'beetle', 'lame', 'hippo'])
|
|
222
|
+
|
|
223
|
+
The `keep` parameter controls which duplicate values are removed.
|
|
224
|
+
The value 'first' keeps the first occurrence for each
|
|
225
|
+
set of duplicated entries. The default value of keep is 'first'.
|
|
226
|
+
|
|
227
|
+
>>> idx.drop_duplicates(keep='first').execute()
|
|
228
|
+
Index(['lame', 'cow', 'beetle', 'hippo'], dtype='object')
|
|
229
|
+
|
|
230
|
+
The value 'last' keeps the last occurrence for each set of duplicated
|
|
231
|
+
entries.
|
|
232
|
+
|
|
233
|
+
>>> idx.drop_duplicates(keep='last').execute()
|
|
234
|
+
Index(['cow', 'beetle', 'lame', 'hippo'], dtype='object')
|
|
235
|
+
|
|
236
|
+
The value ``False`` discards all sets of duplicated entries.
|
|
237
|
+
|
|
238
|
+
>>> idx.drop_duplicates(keep=False).execute()
|
|
239
|
+
Index(['cow', 'beetle', 'hippo'], dtype='object')
|
|
240
|
+
"""
|
|
241
|
+
if keep not in ("first", "last", False):
|
|
242
|
+
raise ValueError("keep could only be one of 'first', 'last' or False")
|
|
243
|
+
if method not in ("auto", "tree", "shuffle", None):
|
|
244
|
+
raise ValueError(
|
|
245
|
+
"method could only be one of 'auto', 'tree', 'shuffle' or None"
|
|
246
|
+
)
|
|
247
|
+
op = DataFrameDropDuplicates(keep=keep, method=method)
|
|
248
|
+
return op(index)
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ._duplicate import DuplicateOperand, validate_subset
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataFrameDuplicated(DuplicateOperand):
|
|
23
|
+
_op_type_ = opcodes.DUPLICATED
|
|
24
|
+
|
|
25
|
+
def __init__(self, output_types=None, **kw):
|
|
26
|
+
super().__init__(_output_types=output_types, **kw)
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def _get_shape(cls, input_shape, op):
|
|
30
|
+
return (input_shape[0],)
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def _gen_tileable_params(cls, op: "DataFrameDuplicated", input_params):
|
|
34
|
+
# duplicated() always returns a Series
|
|
35
|
+
return {
|
|
36
|
+
"shape": cls._get_shape(input_params["shape"], op),
|
|
37
|
+
"index_value": input_params["index_value"],
|
|
38
|
+
"dtype": np.dtype(bool),
|
|
39
|
+
"name": input_params.get("name"),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
def __call__(self, inp, inplace=False):
|
|
43
|
+
self._output_types = [OutputType.series]
|
|
44
|
+
params = self._gen_tileable_params(self, inp.params)
|
|
45
|
+
|
|
46
|
+
return self.new_tileable([inp], kws=[params])
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def df_duplicated(df, subset=None, keep="first", method="auto"):
|
|
50
|
+
"""
|
|
51
|
+
Return boolean Series denoting duplicate rows.
|
|
52
|
+
|
|
53
|
+
Considering certain columns is optional.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
subset : column label or sequence of labels, optional
|
|
58
|
+
Only consider certain columns for identifying duplicates, by
|
|
59
|
+
default use all of the columns.
|
|
60
|
+
keep : {'first', 'last', False}, default 'first'
|
|
61
|
+
Determines which duplicates (if any) to mark.
|
|
62
|
+
|
|
63
|
+
- ``first`` : Mark duplicates as ``True`` except for the first occurrence.
|
|
64
|
+
- ``last`` : Mark duplicates as ``True`` except for the last occurrence.
|
|
65
|
+
- False : Mark all duplicates as ``True``.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
Series
|
|
70
|
+
Boolean series for each duplicated rows.
|
|
71
|
+
|
|
72
|
+
See Also
|
|
73
|
+
--------
|
|
74
|
+
Index.duplicated : Equivalent method on index.
|
|
75
|
+
Series.duplicated : Equivalent method on Series.
|
|
76
|
+
Series.drop_duplicates : Remove duplicate values from Series.
|
|
77
|
+
DataFrame.drop_duplicates : Remove duplicate values from DataFrame.
|
|
78
|
+
|
|
79
|
+
Examples
|
|
80
|
+
--------
|
|
81
|
+
Consider dataset containing ramen rating.
|
|
82
|
+
|
|
83
|
+
>>> import maxframe.dataframe as md
|
|
84
|
+
|
|
85
|
+
>>> df = md.DataFrame({
|
|
86
|
+
... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
|
|
87
|
+
... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'],
|
|
88
|
+
... 'rating': [4, 4, 3.5, 15, 5]
|
|
89
|
+
... })
|
|
90
|
+
>>> df.execute()
|
|
91
|
+
brand style rating
|
|
92
|
+
0 Yum Yum cup 4.0
|
|
93
|
+
1 Yum Yum cup 4.0
|
|
94
|
+
2 Indomie cup 3.5
|
|
95
|
+
3 Indomie pack 15.0
|
|
96
|
+
4 Indomie pack 5.0
|
|
97
|
+
|
|
98
|
+
By default, for each set of duplicated values, the first occurrence
|
|
99
|
+
is set on False and all others on True.
|
|
100
|
+
|
|
101
|
+
>>> df.duplicated().execute()
|
|
102
|
+
0 False
|
|
103
|
+
1 True
|
|
104
|
+
2 False
|
|
105
|
+
3 False
|
|
106
|
+
4 False
|
|
107
|
+
dtype: bool
|
|
108
|
+
|
|
109
|
+
By using 'last', the last occurrence of each set of duplicated values
|
|
110
|
+
is set on False and all others on True.
|
|
111
|
+
|
|
112
|
+
>>> df.duplicated(keep='last').execute()
|
|
113
|
+
0 True
|
|
114
|
+
1 False
|
|
115
|
+
2 False
|
|
116
|
+
3 False
|
|
117
|
+
4 False
|
|
118
|
+
dtype: bool
|
|
119
|
+
|
|
120
|
+
By setting ``keep`` on False, all duplicates are True.
|
|
121
|
+
|
|
122
|
+
>>> df.duplicated(keep=False).execute()
|
|
123
|
+
0 True
|
|
124
|
+
1 True
|
|
125
|
+
2 False
|
|
126
|
+
3 False
|
|
127
|
+
4 False
|
|
128
|
+
dtype: bool
|
|
129
|
+
|
|
130
|
+
To find duplicates on specific column(s), use ``subset``.
|
|
131
|
+
|
|
132
|
+
>>> df.duplicated(subset=['brand']).execute()
|
|
133
|
+
0 False
|
|
134
|
+
1 True
|
|
135
|
+
2 False
|
|
136
|
+
3 True
|
|
137
|
+
4 True
|
|
138
|
+
dtype: bool
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
if method not in ("auto", "tree", "subset_tree", "shuffle", None):
|
|
142
|
+
raise ValueError(
|
|
143
|
+
"method could only be one of "
|
|
144
|
+
"'auto', 'tree', 'subset_tree', 'shuffle' or None"
|
|
145
|
+
)
|
|
146
|
+
subset = validate_subset(df, subset)
|
|
147
|
+
op = DataFrameDuplicated(subset=subset, keep=keep, method=method)
|
|
148
|
+
return op(df)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def series_duplicated(series, keep="first", method="auto"):
|
|
152
|
+
"""
|
|
153
|
+
Indicate duplicate Series values.
|
|
154
|
+
|
|
155
|
+
Duplicated values are indicated as ``True`` values in the resulting
|
|
156
|
+
Series. Either all duplicates, all except the first or all except the
|
|
157
|
+
last occurrence of duplicates can be indicated.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
keep : {'first', 'last', False}, default 'first'
|
|
162
|
+
Method to handle dropping duplicates:
|
|
163
|
+
|
|
164
|
+
- 'first' : Mark duplicates as ``True`` except for the first
|
|
165
|
+
occurrence.
|
|
166
|
+
- 'last' : Mark duplicates as ``True`` except for the last
|
|
167
|
+
occurrence.
|
|
168
|
+
- ``False`` : Mark all duplicates as ``True``.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
Series
|
|
173
|
+
Series indicating whether each value has occurred in the
|
|
174
|
+
preceding values.
|
|
175
|
+
|
|
176
|
+
See Also
|
|
177
|
+
--------
|
|
178
|
+
Index.duplicated : Equivalent method on pandas.Index.
|
|
179
|
+
DataFrame.duplicated : Equivalent method on pandas.DataFrame.
|
|
180
|
+
Series.drop_duplicates : Remove duplicate values from Series.
|
|
181
|
+
|
|
182
|
+
Examples
|
|
183
|
+
--------
|
|
184
|
+
By default, for each set of duplicated values, the first occurrence is
|
|
185
|
+
set on False and all others on True:
|
|
186
|
+
|
|
187
|
+
>>> import maxframe.dataframe as md
|
|
188
|
+
|
|
189
|
+
>>> animals = md.Series(['lame', 'cow', 'lame', 'beetle', 'lame'])
|
|
190
|
+
>>> animals.duplicated().execute()
|
|
191
|
+
0 False
|
|
192
|
+
1 False
|
|
193
|
+
2 True
|
|
194
|
+
3 False
|
|
195
|
+
4 True
|
|
196
|
+
dtype: bool
|
|
197
|
+
|
|
198
|
+
which is equivalent to
|
|
199
|
+
|
|
200
|
+
>>> animals.duplicated(keep='first').execute()
|
|
201
|
+
0 False
|
|
202
|
+
1 False
|
|
203
|
+
2 True
|
|
204
|
+
3 False
|
|
205
|
+
4 True
|
|
206
|
+
dtype: bool
|
|
207
|
+
|
|
208
|
+
By using 'last', the last occurrence of each set of duplicated values
|
|
209
|
+
is set on False and all others on True:
|
|
210
|
+
|
|
211
|
+
>>> animals.duplicated(keep='last').execute()
|
|
212
|
+
0 True
|
|
213
|
+
1 False
|
|
214
|
+
2 True
|
|
215
|
+
3 False
|
|
216
|
+
4 False
|
|
217
|
+
dtype: bool
|
|
218
|
+
|
|
219
|
+
By setting keep on ``False``, all duplicates are True:
|
|
220
|
+
|
|
221
|
+
>>> animals.duplicated(keep=False).execute()
|
|
222
|
+
0 True
|
|
223
|
+
1 False
|
|
224
|
+
2 True
|
|
225
|
+
3 False
|
|
226
|
+
4 True
|
|
227
|
+
dtype: bool
|
|
228
|
+
"""
|
|
229
|
+
if method not in ("auto", "tree", "shuffle", None):
|
|
230
|
+
raise ValueError(
|
|
231
|
+
"method could only be one of 'auto', 'tree', 'shuffle' or None"
|
|
232
|
+
)
|
|
233
|
+
op = DataFrameDuplicated(keep=keep, method=method)
|
|
234
|
+
return op(series)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def index_duplicated(index, keep="first"):
|
|
238
|
+
"""
|
|
239
|
+
Indicate duplicate index values.
|
|
240
|
+
|
|
241
|
+
Duplicated values are indicated as ``True`` values in the resulting
|
|
242
|
+
array. Either all duplicates, all except the first, or all except the
|
|
243
|
+
last occurrence of duplicates can be indicated.
|
|
244
|
+
|
|
245
|
+
Parameters
|
|
246
|
+
----------
|
|
247
|
+
keep : {'first', 'last', False}, default 'first'
|
|
248
|
+
The value or values in a set of duplicates to mark as missing.
|
|
249
|
+
- 'first' : Mark duplicates as ``True`` except for the first
|
|
250
|
+
occurrence.
|
|
251
|
+
- 'last' : Mark duplicates as ``True`` except for the last
|
|
252
|
+
occurrence.
|
|
253
|
+
- ``False`` : Mark all duplicates as ``True``.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
Tensor
|
|
258
|
+
|
|
259
|
+
See Also
|
|
260
|
+
--------
|
|
261
|
+
Series.duplicated : Equivalent method on pandas.Series.
|
|
262
|
+
DataFrame.duplicated : Equivalent method on pandas.DataFrame.
|
|
263
|
+
Index.drop_duplicates : Remove duplicate values from Index.
|
|
264
|
+
|
|
265
|
+
Examples
|
|
266
|
+
--------
|
|
267
|
+
By default, for each set of duplicated values, the first occurrence is
|
|
268
|
+
set to False and all others to True:
|
|
269
|
+
|
|
270
|
+
>>> import maxframe.dataframe as md
|
|
271
|
+
|
|
272
|
+
>>> idx = md.Index(['lame', 'cow', 'lame', 'beetle', 'lame'])
|
|
273
|
+
>>> idx.duplicated().execute()
|
|
274
|
+
array([False, False, True, False, True])
|
|
275
|
+
|
|
276
|
+
which is equivalent to
|
|
277
|
+
|
|
278
|
+
>>> idx.duplicated(keep='first').execute()
|
|
279
|
+
array([False, False, True, False, True])
|
|
280
|
+
|
|
281
|
+
By using 'last', the last occurrence of each set of duplicated values
|
|
282
|
+
is set on False and all others on True:
|
|
283
|
+
|
|
284
|
+
>>> idx.duplicated(keep='last').execute()
|
|
285
|
+
array([ True, False, True, False, False])
|
|
286
|
+
|
|
287
|
+
By setting keep on ``False``, all duplicates are True:
|
|
288
|
+
|
|
289
|
+
>>> idx.duplicated(keep=False).execute()
|
|
290
|
+
array([ True, False, True, False, True])
|
|
291
|
+
"""
|
|
292
|
+
return index.to_series().duplicated(keep=keep).to_tensor()
|