maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-311-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-311-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-311-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from numbers import Integral
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import ENTITY_TYPE, ExecutableTuple, OutputType
|
|
22
|
+
from ...serialization.serializables import (
|
|
23
|
+
AnyField,
|
|
24
|
+
BoolField,
|
|
25
|
+
Int32Field,
|
|
26
|
+
KeyField,
|
|
27
|
+
StringField,
|
|
28
|
+
)
|
|
29
|
+
from ...tensor import tensor as astensor
|
|
30
|
+
from ...tensor.core import TensorOrder
|
|
31
|
+
from ..core import INDEX_TYPE, SERIES_TYPE
|
|
32
|
+
from ..initializer import Series as asseries
|
|
33
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
34
|
+
from ..utils import parse_index
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DataFrameCut(DataFrameOperator, DataFrameOperatorMixin):
|
|
38
|
+
_op_type_ = opcodes.CUT
|
|
39
|
+
|
|
40
|
+
_input = KeyField("input")
|
|
41
|
+
bins = AnyField("bins", default=None)
|
|
42
|
+
right = BoolField("right", default=None)
|
|
43
|
+
labels = AnyField("labels", default=None)
|
|
44
|
+
retbins = BoolField("retbins", default=None)
|
|
45
|
+
precision = Int32Field("precision", default=None)
|
|
46
|
+
include_lowest = BoolField("include_lowest", default=None)
|
|
47
|
+
duplicates = StringField("duplicates", default=None)
|
|
48
|
+
ordered = BoolField("ordered", default=None)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def input(self):
|
|
52
|
+
return self._input
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def output_limit(self):
|
|
56
|
+
return 1 if not self.retbins else 2
|
|
57
|
+
|
|
58
|
+
def _set_inputs(self, inputs):
|
|
59
|
+
super()._set_inputs(inputs)
|
|
60
|
+
inputs_iter = iter(self._inputs)
|
|
61
|
+
self._input = next(inputs_iter)
|
|
62
|
+
if isinstance(self.bins, ENTITY_TYPE):
|
|
63
|
+
self.bins = next(inputs_iter)
|
|
64
|
+
if isinstance(self.labels, ENTITY_TYPE):
|
|
65
|
+
self.labels = next(inputs_iter)
|
|
66
|
+
|
|
67
|
+
def __call__(self, x):
|
|
68
|
+
if isinstance(x, pd.Series):
|
|
69
|
+
x = asseries(x)
|
|
70
|
+
elif not isinstance(x, ENTITY_TYPE):
|
|
71
|
+
x = astensor(x)
|
|
72
|
+
if x.ndim != 1:
|
|
73
|
+
raise ValueError("Input array must be 1 dimensional")
|
|
74
|
+
if x.size == 0:
|
|
75
|
+
raise ValueError("Cannot cut empty array")
|
|
76
|
+
|
|
77
|
+
inputs = [x]
|
|
78
|
+
if self.labels is not None and not isinstance(self.labels, (bool, ENTITY_TYPE)):
|
|
79
|
+
self.labels = np.asarray(self.labels)
|
|
80
|
+
|
|
81
|
+
# infer dtype
|
|
82
|
+
x_empty = (
|
|
83
|
+
pd.Series([1], dtype=x.dtype)
|
|
84
|
+
if isinstance(x, SERIES_TYPE)
|
|
85
|
+
else np.asarray([1], dtype=x.dtype)
|
|
86
|
+
)
|
|
87
|
+
if isinstance(self.bins, INDEX_TYPE):
|
|
88
|
+
bins = self.bins.index_value.to_pandas()
|
|
89
|
+
inputs.append(self.bins)
|
|
90
|
+
bins_unknown = True
|
|
91
|
+
elif isinstance(self.bins, ENTITY_TYPE):
|
|
92
|
+
bins = np.asarray([2], dtype=self.bins.dtype)
|
|
93
|
+
inputs.append(self.bins)
|
|
94
|
+
bins_unknown = True
|
|
95
|
+
else:
|
|
96
|
+
bins = self.bins
|
|
97
|
+
bins_unknown = isinstance(self.bins, Integral)
|
|
98
|
+
if isinstance(self.labels, ENTITY_TYPE):
|
|
99
|
+
bins_unknown = True
|
|
100
|
+
labels = None
|
|
101
|
+
inputs.append(self.labels)
|
|
102
|
+
else:
|
|
103
|
+
if self.labels is False or not bins_unknown:
|
|
104
|
+
labels = self.labels
|
|
105
|
+
else:
|
|
106
|
+
labels = None
|
|
107
|
+
ret = pd.cut(
|
|
108
|
+
x_empty,
|
|
109
|
+
bins,
|
|
110
|
+
right=self.right,
|
|
111
|
+
labels=labels,
|
|
112
|
+
retbins=True,
|
|
113
|
+
include_lowest=self.include_lowest,
|
|
114
|
+
duplicates=self.duplicates,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
kws = []
|
|
118
|
+
output_types = []
|
|
119
|
+
if bins_unknown and isinstance(ret[0].dtype, pd.CategoricalDtype):
|
|
120
|
+
# inaccurate dtype, just create an empty one
|
|
121
|
+
out_dtype = pd.CategoricalDtype()
|
|
122
|
+
else:
|
|
123
|
+
out_dtype = ret[0].dtype
|
|
124
|
+
if isinstance(ret[0], pd.Series):
|
|
125
|
+
output_types.append(OutputType.series)
|
|
126
|
+
kws.append(
|
|
127
|
+
{
|
|
128
|
+
"dtype": out_dtype,
|
|
129
|
+
"shape": x.shape,
|
|
130
|
+
"index_value": x.index_value,
|
|
131
|
+
"name": x.name,
|
|
132
|
+
}
|
|
133
|
+
)
|
|
134
|
+
elif isinstance(ret[0], np.ndarray):
|
|
135
|
+
output_types.append(OutputType.tensor)
|
|
136
|
+
kws.append(
|
|
137
|
+
{"dtype": out_dtype, "shape": x.shape, "order": TensorOrder.C_ORDER}
|
|
138
|
+
)
|
|
139
|
+
else:
|
|
140
|
+
assert isinstance(ret[0], pd.Categorical)
|
|
141
|
+
output_types.append(OutputType.categorical)
|
|
142
|
+
kws.append(
|
|
143
|
+
{
|
|
144
|
+
"dtype": out_dtype,
|
|
145
|
+
"shape": x.shape,
|
|
146
|
+
"categories_value": parse_index(
|
|
147
|
+
out_dtype.categories, store_data=True
|
|
148
|
+
),
|
|
149
|
+
}
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if self.retbins:
|
|
153
|
+
if isinstance(self.bins, (pd.IntervalIndex, INDEX_TYPE)):
|
|
154
|
+
output_types.append(OutputType.index)
|
|
155
|
+
kws.append(
|
|
156
|
+
{
|
|
157
|
+
"dtype": self.bins.dtype,
|
|
158
|
+
"shape": self.bins.shape,
|
|
159
|
+
"index_value": self.bins.index_value
|
|
160
|
+
if isinstance(self.bins, INDEX_TYPE)
|
|
161
|
+
else parse_index(self.bins, store_data=False),
|
|
162
|
+
"name": self.bins.name,
|
|
163
|
+
}
|
|
164
|
+
)
|
|
165
|
+
else:
|
|
166
|
+
output_types.append(OutputType.tensor)
|
|
167
|
+
kws.append(
|
|
168
|
+
{
|
|
169
|
+
"dtype": ret[1].dtype,
|
|
170
|
+
"shape": ret[1].shape if ret[1].size > 0 else (np.nan,),
|
|
171
|
+
"order": TensorOrder.C_ORDER,
|
|
172
|
+
}
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
self.output_types = output_types
|
|
176
|
+
return ExecutableTuple(self.new_tileables(inputs, kws=kws))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def cut(
|
|
180
|
+
x,
|
|
181
|
+
bins,
|
|
182
|
+
right: bool = True,
|
|
183
|
+
labels=None,
|
|
184
|
+
retbins: bool = False,
|
|
185
|
+
precision: int = 3,
|
|
186
|
+
include_lowest: bool = False,
|
|
187
|
+
duplicates: str = "raise",
|
|
188
|
+
ordered: bool = True,
|
|
189
|
+
):
|
|
190
|
+
"""
|
|
191
|
+
Bin values into discrete intervals.
|
|
192
|
+
|
|
193
|
+
Use `cut` when you need to segment and sort data values into bins. This
|
|
194
|
+
function is also useful for going from a continuous variable to a
|
|
195
|
+
categorical variable. For example, `cut` could convert ages to groups of
|
|
196
|
+
age ranges. Supports binning into an equal number of bins, or a
|
|
197
|
+
pre-specified array of bins.
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
x : array-like
|
|
202
|
+
The input array to be binned. Must be 1-dimensional.
|
|
203
|
+
bins : int, sequence of scalars, or IntervalIndex
|
|
204
|
+
The criteria to bin by.
|
|
205
|
+
|
|
206
|
+
* int : Defines the number of equal-width bins in the range of `x`. The
|
|
207
|
+
range of `x` is extended by .1% on each side to include the minimum
|
|
208
|
+
and maximum values of `x`.
|
|
209
|
+
* sequence of scalars : Defines the bin edges allowing for non-uniform
|
|
210
|
+
width. No extension of the range of `x` is done.
|
|
211
|
+
* IntervalIndex : Defines the exact bins to be used. Note that
|
|
212
|
+
IntervalIndex for `bins` must be non-overlapping.
|
|
213
|
+
|
|
214
|
+
right : bool, default True
|
|
215
|
+
Indicates whether `bins` includes the rightmost edge or not. If
|
|
216
|
+
``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
|
|
217
|
+
indicate (1,2], (2,3], (3,4]. This argument is ignored when
|
|
218
|
+
`bins` is an IntervalIndex.
|
|
219
|
+
labels : array or False, default None
|
|
220
|
+
Specifies the labels for the returned bins. Must be the same length as
|
|
221
|
+
the resulting bins. If False, returns only integer indicators of the
|
|
222
|
+
bins. This affects the type of the output container (see below).
|
|
223
|
+
This argument is ignored when `bins` is an IntervalIndex. If True,
|
|
224
|
+
raises an error.
|
|
225
|
+
retbins : bool, default False
|
|
226
|
+
Whether to return the bins or not. Useful when bins is provided
|
|
227
|
+
as a scalar.
|
|
228
|
+
precision : int, default 3
|
|
229
|
+
The precision at which to store and display the bins labels.
|
|
230
|
+
include_lowest : bool, default False
|
|
231
|
+
Whether the first interval should be left-inclusive or not.
|
|
232
|
+
duplicates : {default 'raise', 'drop'}, optional
|
|
233
|
+
If bin edges are not unique, raise ValueError or drop non-uniques.
|
|
234
|
+
ordered : bool, default True
|
|
235
|
+
Whether the labels are ordered or not. Applies to returned types
|
|
236
|
+
Categorical and Series (with Categorical dtype). If True, the resulting
|
|
237
|
+
categorical will be ordered. If False, the resulting categorical will be
|
|
238
|
+
unordered (labels must be provided).
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
out : Categorical, Series, or Tensor
|
|
243
|
+
An array-like object representing the respective bin for each value
|
|
244
|
+
of `x`. The type depends on the value of `labels`.
|
|
245
|
+
|
|
246
|
+
* True (default) : returns a Series for Series `x` or a
|
|
247
|
+
Categorical for all other inputs. The values stored within
|
|
248
|
+
are Interval dtype.
|
|
249
|
+
|
|
250
|
+
* sequence of scalars : returns a Series for Series `x` or a
|
|
251
|
+
Categorical for all other inputs. The values stored within
|
|
252
|
+
are whatever the type in the sequence is.
|
|
253
|
+
|
|
254
|
+
* False : returns a tensor of integers.
|
|
255
|
+
|
|
256
|
+
bins : Tensor or IntervalIndex.
|
|
257
|
+
The computed or specified bins. Only returned when `retbins=True`.
|
|
258
|
+
For scalar or sequence `bins`, this is a tensor with the computed
|
|
259
|
+
bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For
|
|
260
|
+
an IntervalIndex `bins`, this is equal to `bins`.
|
|
261
|
+
|
|
262
|
+
See Also
|
|
263
|
+
--------
|
|
264
|
+
qcut : Discretize variable into equal-sized buckets based on rank
|
|
265
|
+
or based on sample quantiles.
|
|
266
|
+
Categorical : Array type for storing data that come from a
|
|
267
|
+
fixed set of values.
|
|
268
|
+
Series : One-dimensional array with axis labels (including time series).
|
|
269
|
+
IntervalIndex : Immutable Index implementing an ordered, sliceable set.
|
|
270
|
+
|
|
271
|
+
Notes
|
|
272
|
+
-----
|
|
273
|
+
Any NA values will be NA in the result. Out of bounds values will be NA in
|
|
274
|
+
the resulting Series or Categorical object.
|
|
275
|
+
|
|
276
|
+
Examples
|
|
277
|
+
--------
|
|
278
|
+
Discretize into three equal-sized bins.
|
|
279
|
+
|
|
280
|
+
>>> import maxframe.tensor as mt
|
|
281
|
+
>>> import maxframe.dataframe as md
|
|
282
|
+
|
|
283
|
+
>>> md.cut(mt.array([1, 7, 5, 4, 6, 3]), 3).execute()
|
|
284
|
+
... # doctest: +ELLIPSIS
|
|
285
|
+
[(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
|
|
286
|
+
Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
|
|
287
|
+
|
|
288
|
+
>>> md.cut(mt.array([1, 7, 5, 4, 6, 3]), 3, retbins=True).execute()
|
|
289
|
+
... # doctest: +ELLIPSIS
|
|
290
|
+
([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
|
|
291
|
+
Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
|
|
292
|
+
array([0.994, 3. , 5. , 7. ]))
|
|
293
|
+
|
|
294
|
+
Discovers the same bins, but assign them specific labels. Notice that
|
|
295
|
+
the returned Categorical's categories are `labels` and is ordered.
|
|
296
|
+
|
|
297
|
+
>>> md.cut(mt.array([1, 7, 5, 4, 6, 3]),
|
|
298
|
+
... 3, labels=["bad", "medium", "good"]).execute()
|
|
299
|
+
[bad, good, medium, medium, good, bad]
|
|
300
|
+
Categories (3, object): [bad < medium < good]
|
|
301
|
+
|
|
302
|
+
ordered=False will result in unordered categories when labels are passed. This parameter
|
|
303
|
+
can be used to allow non-unique labels:
|
|
304
|
+
|
|
305
|
+
>>> md.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
|
|
306
|
+
... labels=["B", "A", "B"], ordered=False).execute()
|
|
307
|
+
['B', 'B', 'A', 'A', 'B', 'B']
|
|
308
|
+
Categories (2, object): ['A', 'B']
|
|
309
|
+
|
|
310
|
+
``labels=False`` implies you just want the bins back.
|
|
311
|
+
|
|
312
|
+
>>> md.cut([0, 1, 1, 2], bins=4, labels=False).execute()
|
|
313
|
+
array([0, 1, 1, 3])
|
|
314
|
+
|
|
315
|
+
Passing a Series as an input returns a Series with categorical dtype:
|
|
316
|
+
|
|
317
|
+
>>> s = md.Series(mt.array([2, 4, 6, 8, 10]),
|
|
318
|
+
... index=['a', 'b', 'c', 'd', 'e'])
|
|
319
|
+
>>> md.cut(s, 3).execute()
|
|
320
|
+
... # doctest: +ELLIPSIS
|
|
321
|
+
a (1.992, 4.667]
|
|
322
|
+
b (1.992, 4.667]
|
|
323
|
+
c (4.667, 7.333]
|
|
324
|
+
d (7.333, 10.0]
|
|
325
|
+
e (7.333, 10.0]
|
|
326
|
+
dtype: category
|
|
327
|
+
Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ...
|
|
328
|
+
|
|
329
|
+
Passing a Series as an input returns a Series with mapping value.
|
|
330
|
+
It is used to map numerically to intervals based on bins.
|
|
331
|
+
|
|
332
|
+
>>> s = md.Series(mt.array([2, 4, 6, 8, 10]),
|
|
333
|
+
... index=['a', 'b', 'c', 'd', 'e'])
|
|
334
|
+
>>> md.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False).execute()
|
|
335
|
+
... # doctest: +ELLIPSIS
|
|
336
|
+
(a 0.0
|
|
337
|
+
b 1.0
|
|
338
|
+
c 2.0
|
|
339
|
+
d 3.0
|
|
340
|
+
e NaN
|
|
341
|
+
dtype: float64, array([0, 2, 4, 6, 8, 10]))
|
|
342
|
+
|
|
343
|
+
Use `drop` optional when bins is not unique
|
|
344
|
+
|
|
345
|
+
>>> md.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
|
|
346
|
+
... right=False, duplicates='drop').execute()
|
|
347
|
+
... # doctest: +ELLIPSIS
|
|
348
|
+
(a 0.0
|
|
349
|
+
b 1.0
|
|
350
|
+
c 2.0
|
|
351
|
+
d 3.0
|
|
352
|
+
e NaN
|
|
353
|
+
dtype: float64, array([0, 2, 4, 6, 10]))
|
|
354
|
+
|
|
355
|
+
Passing an IntervalIndex for `bins` results in those categories exactly.
|
|
356
|
+
Notice that values not covered by the IntervalIndex are set to NaN. 0
|
|
357
|
+
is to the left of the first bin (which is closed on the right), and 1.5
|
|
358
|
+
falls between two bins.
|
|
359
|
+
|
|
360
|
+
>>> bins = md.Index(pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]))
|
|
361
|
+
>>> md.cut([0, 0.5, 1.5, 2.5, 4.5], bins).execute()
|
|
362
|
+
[NaN, (0, 1], NaN, (2, 3], (4, 5]]
|
|
363
|
+
Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]]
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
if isinstance(bins, Integral) and bins < 1:
|
|
367
|
+
raise ValueError("`bins` should be a positive integer")
|
|
368
|
+
|
|
369
|
+
op = DataFrameCut(
|
|
370
|
+
bins=bins,
|
|
371
|
+
right=right,
|
|
372
|
+
labels=labels,
|
|
373
|
+
retbins=retbins,
|
|
374
|
+
precision=precision,
|
|
375
|
+
include_lowest=include_lowest,
|
|
376
|
+
duplicates=duplicates,
|
|
377
|
+
ordered=ordered,
|
|
378
|
+
)
|
|
379
|
+
ret = op(x)
|
|
380
|
+
if not retbins:
|
|
381
|
+
return ret[0]
|
|
382
|
+
else:
|
|
383
|
+
return ret
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ...serialization.serializables import (
|
|
20
|
+
BoolField,
|
|
21
|
+
DictField,
|
|
22
|
+
KeyField,
|
|
23
|
+
StringField,
|
|
24
|
+
TupleField,
|
|
25
|
+
)
|
|
26
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
27
|
+
from ..utils import build_empty_series
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SeriesDatetimeMethod(DataFrameOperator, DataFrameOperatorMixin):
|
|
31
|
+
_op_type_ = opcodes.DATETIME_METHOD
|
|
32
|
+
|
|
33
|
+
_input = KeyField("input")
|
|
34
|
+
method = StringField("method", default=None)
|
|
35
|
+
method_args = TupleField("method_args", default=None)
|
|
36
|
+
method_kwargs = DictField("method_kwargs", default=None)
|
|
37
|
+
is_property = BoolField("is_property", default=None)
|
|
38
|
+
|
|
39
|
+
def __init__(self, output_types=None, **kw):
|
|
40
|
+
super().__init__(_output_types=output_types, **kw)
|
|
41
|
+
if not self.output_types:
|
|
42
|
+
self.output_types = [OutputType.series]
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def input(self):
|
|
46
|
+
return self._input
|
|
47
|
+
|
|
48
|
+
def _set_inputs(self, inputs):
|
|
49
|
+
super()._set_inputs(inputs)
|
|
50
|
+
self._input = self._inputs[0]
|
|
51
|
+
|
|
52
|
+
def __call__(self, inp):
|
|
53
|
+
return _datetime_method_to_handlers[self.method].call(self, inp)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SeriesDatetimeMethodBaseHandler:
|
|
57
|
+
@classmethod
|
|
58
|
+
def call(cls, op, inp):
|
|
59
|
+
empty_series = build_empty_series(inp.dtype)
|
|
60
|
+
if op.is_property:
|
|
61
|
+
test_obj = getattr(empty_series.dt, op.method)
|
|
62
|
+
else:
|
|
63
|
+
test_obj = getattr(empty_series.dt, op.method)(
|
|
64
|
+
*op.method_args, **op.method_kwargs
|
|
65
|
+
)
|
|
66
|
+
dtype = test_obj.dtype
|
|
67
|
+
return op.new_series(
|
|
68
|
+
[inp],
|
|
69
|
+
shape=inp.shape,
|
|
70
|
+
dtype=dtype,
|
|
71
|
+
index_value=inp.index_value,
|
|
72
|
+
name=inp.name,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
_datetime_method_to_handlers = {}
|
|
77
|
+
for method in dir(pd.Series.dt):
|
|
78
|
+
if not method.startswith("_"):
|
|
79
|
+
_datetime_method_to_handlers[method] = SeriesDatetimeMethodBaseHandler
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ... import opcodes as OperandDef
|
|
19
|
+
from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
|
|
20
|
+
from ..core import SERIES_TYPE
|
|
21
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
22
|
+
from ..utils import build_empty_df, parse_index
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
26
|
+
_op_type_ = OperandDef.DESCRIBE
|
|
27
|
+
|
|
28
|
+
input = KeyField("input", default=None)
|
|
29
|
+
percentiles = ListField("percentiles", FieldTypes.float64, default=None)
|
|
30
|
+
include = AnyField("include", default=None)
|
|
31
|
+
exclude = AnyField("exclude", default=None)
|
|
32
|
+
|
|
33
|
+
def __init__(self, output_types=None, **kw):
|
|
34
|
+
super().__init__(_output_types=output_types, **kw)
|
|
35
|
+
|
|
36
|
+
def _set_inputs(self, inputs):
|
|
37
|
+
super()._set_inputs(inputs)
|
|
38
|
+
self.input = self._inputs[0]
|
|
39
|
+
|
|
40
|
+
def __call__(self, df_or_series):
|
|
41
|
+
if isinstance(df_or_series, SERIES_TYPE):
|
|
42
|
+
if not np.issubdtype(df_or_series.dtype, np.number):
|
|
43
|
+
raise NotImplementedError("non-numeric type is not supported for now")
|
|
44
|
+
test_series = pd.Series([], dtype=df_or_series.dtype).describe(
|
|
45
|
+
percentiles=self.percentiles,
|
|
46
|
+
include=self.include,
|
|
47
|
+
exclude=self.exclude,
|
|
48
|
+
)
|
|
49
|
+
return self.new_series(
|
|
50
|
+
[df_or_series],
|
|
51
|
+
shape=(len(test_series),),
|
|
52
|
+
dtype=test_series.dtype,
|
|
53
|
+
index_value=parse_index(test_series.index, store_data=True),
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
test_inp_df = build_empty_df(df_or_series.dtypes)
|
|
57
|
+
test_df = test_inp_df.describe(
|
|
58
|
+
percentiles=self.percentiles,
|
|
59
|
+
include=self.include,
|
|
60
|
+
exclude=self.exclude,
|
|
61
|
+
)
|
|
62
|
+
if len(self.percentiles) == 0:
|
|
63
|
+
# specify percentiles=False
|
|
64
|
+
# Note: unlike pandas that False is illegal value for percentiles,
|
|
65
|
+
# MaxFrame DataFrame allows user to specify percentiles=False
|
|
66
|
+
# to skip computation about percentiles
|
|
67
|
+
test_df.drop(["50%"], axis=0, inplace=True)
|
|
68
|
+
for dtype in test_df.dtypes:
|
|
69
|
+
if not np.issubdtype(dtype, np.number):
|
|
70
|
+
raise NotImplementedError(
|
|
71
|
+
"non-numeric type is not supported for now"
|
|
72
|
+
)
|
|
73
|
+
return self.new_dataframe(
|
|
74
|
+
[df_or_series],
|
|
75
|
+
shape=test_df.shape,
|
|
76
|
+
dtypes=test_df.dtypes,
|
|
77
|
+
index_value=parse_index(test_df.index, store_data=True),
|
|
78
|
+
columns_value=parse_index(test_df.columns, store_data=True),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def describe(df_or_series, percentiles=None, include=None, exclude=None):
|
|
83
|
+
if percentiles is False:
|
|
84
|
+
percentiles = []
|
|
85
|
+
elif percentiles is None:
|
|
86
|
+
percentiles = [0.25, 0.5, 0.75]
|
|
87
|
+
else:
|
|
88
|
+
percentiles = list(percentiles)
|
|
89
|
+
if percentiles is not None:
|
|
90
|
+
for p in percentiles:
|
|
91
|
+
if p < 0 or p > 1:
|
|
92
|
+
raise ValueError(
|
|
93
|
+
"percentiles should all be in the interval [0, 1]. "
|
|
94
|
+
"Try [{0:.3f}] instead.".format(p / 100)
|
|
95
|
+
)
|
|
96
|
+
# median should always be included
|
|
97
|
+
if 0.5 not in percentiles:
|
|
98
|
+
percentiles.append(0.5)
|
|
99
|
+
percentiles = np.asarray(percentiles)
|
|
100
|
+
|
|
101
|
+
# sort and check for duplicates
|
|
102
|
+
unique_pcts = np.unique(percentiles)
|
|
103
|
+
if len(unique_pcts) < len(percentiles):
|
|
104
|
+
raise ValueError("percentiles cannot contain duplicates")
|
|
105
|
+
percentiles = unique_pcts.tolist()
|
|
106
|
+
|
|
107
|
+
op = DataFrameDescribe(percentiles=percentiles, include=include, exclude=exclude)
|
|
108
|
+
return op(df_or_series)
|