maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ... import opcodes
|
|
19
|
+
from ...core import OutputType
|
|
20
|
+
from ...serialization.serializables import AnyField, BoolField
|
|
21
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
22
|
+
from ..utils import parse_index
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DataFrameExplode(DataFrameOperator, DataFrameOperatorMixin):
|
|
26
|
+
_op_type_ = opcodes.EXPLODE
|
|
27
|
+
|
|
28
|
+
column = AnyField("column", default=None)
|
|
29
|
+
ignore_index = BoolField("ignore_field", default=None)
|
|
30
|
+
|
|
31
|
+
def __init__(self, output_types=None, **kw):
|
|
32
|
+
super().__init__(_output_types=output_types, **kw)
|
|
33
|
+
|
|
34
|
+
def _rewrite_params(self, in_obj):
|
|
35
|
+
params = in_obj.params.copy()
|
|
36
|
+
new_shape = list(in_obj.shape)
|
|
37
|
+
new_shape[0] = np.nan
|
|
38
|
+
params["shape"] = tuple(new_shape)
|
|
39
|
+
|
|
40
|
+
if self.ignore_index:
|
|
41
|
+
params["index_value"] = parse_index(
|
|
42
|
+
pd.RangeIndex(-1), (in_obj.key, in_obj.index_value.key)
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
params["index_value"] = parse_index(
|
|
46
|
+
None, (in_obj.key, in_obj.index_value.key)
|
|
47
|
+
)
|
|
48
|
+
return params
|
|
49
|
+
|
|
50
|
+
def __call__(self, df_or_series):
|
|
51
|
+
return self.new_tileable([df_or_series], **self._rewrite_params(df_or_series))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def df_explode(df, column, ignore_index=False):
|
|
55
|
+
"""
|
|
56
|
+
Transform each element of a list-like to a row, replicating index values.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
column : str or tuple
|
|
61
|
+
Column to explode.
|
|
62
|
+
ignore_index : bool, default False
|
|
63
|
+
If True, the resulting index will be labeled 0, 1, …, n - 1.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
DataFrame
|
|
68
|
+
Exploded lists to rows of the subset columns;
|
|
69
|
+
index will be duplicated for these rows.
|
|
70
|
+
|
|
71
|
+
Raises
|
|
72
|
+
------
|
|
73
|
+
ValueError :
|
|
74
|
+
if columns of the frame are not unique.
|
|
75
|
+
|
|
76
|
+
See Also
|
|
77
|
+
--------
|
|
78
|
+
DataFrame.unstack : Pivot a level of the (necessarily hierarchical)
|
|
79
|
+
index labels.
|
|
80
|
+
DataFrame.melt : Unpivot a DataFrame from wide format to long format.
|
|
81
|
+
Series.explode : Explode a DataFrame from list-like columns to long format.
|
|
82
|
+
|
|
83
|
+
Notes
|
|
84
|
+
-----
|
|
85
|
+
This routine will explode list-likes including lists, tuples,
|
|
86
|
+
Series, and np.ndarray. The result dtype of the subset rows will
|
|
87
|
+
be object. Scalars will be returned unchanged. Empty list-likes will
|
|
88
|
+
result in a np.nan for that row.
|
|
89
|
+
|
|
90
|
+
Examples
|
|
91
|
+
--------
|
|
92
|
+
>>> import maxframe.tensor as mt
|
|
93
|
+
>>> import maxframe.dataframe as md
|
|
94
|
+
>>> df = md.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1})
|
|
95
|
+
>>> df.execute()
|
|
96
|
+
A B
|
|
97
|
+
0 [1, 2, 3] 1
|
|
98
|
+
1 foo 1
|
|
99
|
+
2 [] 1
|
|
100
|
+
3 [3, 4] 1
|
|
101
|
+
|
|
102
|
+
>>> df.explode('A').execute()
|
|
103
|
+
A B
|
|
104
|
+
0 1 1
|
|
105
|
+
0 2 1
|
|
106
|
+
0 3 1
|
|
107
|
+
1 foo 1
|
|
108
|
+
2 NaN 1
|
|
109
|
+
3 3 1
|
|
110
|
+
3 4 1
|
|
111
|
+
"""
|
|
112
|
+
op = DataFrameExplode(
|
|
113
|
+
column=column, ignore_index=ignore_index, output_types=[OutputType.dataframe]
|
|
114
|
+
)
|
|
115
|
+
return op(df)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def series_explode(series, ignore_index=False):
|
|
119
|
+
"""
|
|
120
|
+
Transform each element of a list-like to a row.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
ignore_index : bool, default False
|
|
125
|
+
If True, the resulting index will be labeled 0, 1, …, n - 1.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
Series
|
|
130
|
+
Exploded lists to rows; index will be duplicated for these rows.
|
|
131
|
+
|
|
132
|
+
See Also
|
|
133
|
+
--------
|
|
134
|
+
Series.str.split : Split string values on specified separator.
|
|
135
|
+
Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
|
|
136
|
+
to produce DataFrame.
|
|
137
|
+
DataFrame.melt : Unpivot a DataFrame from wide format to long format.
|
|
138
|
+
DataFrame.explode : Explode a DataFrame from list-like
|
|
139
|
+
columns to long format.
|
|
140
|
+
|
|
141
|
+
Notes
|
|
142
|
+
-----
|
|
143
|
+
This routine will explode list-likes including lists, tuples,
|
|
144
|
+
Series, and np.ndarray. The result dtype of the subset rows will
|
|
145
|
+
be object. Scalars will be returned unchanged. Empty list-likes will
|
|
146
|
+
result in a np.nan for that row.
|
|
147
|
+
|
|
148
|
+
Examples
|
|
149
|
+
--------
|
|
150
|
+
>>> import maxframe.tensor as mt
|
|
151
|
+
>>> import maxframe.dataframe as md
|
|
152
|
+
>>> s = md.Series([[1, 2, 3], 'foo', [], [3, 4]])
|
|
153
|
+
>>> s.execute()
|
|
154
|
+
0 [1, 2, 3]
|
|
155
|
+
1 foo
|
|
156
|
+
2 []
|
|
157
|
+
3 [3, 4]
|
|
158
|
+
dtype: object
|
|
159
|
+
|
|
160
|
+
>>> s.explode().execute()
|
|
161
|
+
0 1
|
|
162
|
+
0 2
|
|
163
|
+
0 3
|
|
164
|
+
1 foo
|
|
165
|
+
2 NaN
|
|
166
|
+
3 3
|
|
167
|
+
3 4
|
|
168
|
+
dtype: object
|
|
169
|
+
"""
|
|
170
|
+
op = DataFrameExplode(ignore_index=ignore_index, output_types=[OutputType.series])
|
|
171
|
+
return op(series)
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ...serialization.serializables import AnyField, BoolField, ListField, StringField
|
|
20
|
+
from ..datasource.dataframe import from_pandas as from_pandas_df
|
|
21
|
+
from ..datasource.series import from_pandas as from_pandas_series
|
|
22
|
+
from ..initializer import Series as asseries
|
|
23
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
+
|
|
25
|
+
_encoding_dtype_kind = ["O", "S", "U"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
|
|
29
|
+
prefix = AnyField("prefix", default=None)
|
|
30
|
+
prefix_sep = StringField("prefix_sep", default=None)
|
|
31
|
+
dummy_na = BoolField("dummy_na", default=None)
|
|
32
|
+
columns = ListField("columns", default=None)
|
|
33
|
+
sparse = BoolField("sparse", default=None)
|
|
34
|
+
drop_first = BoolField("drop_first", default=None)
|
|
35
|
+
dtype = AnyField("dtype", default=None)
|
|
36
|
+
|
|
37
|
+
def __init__(self, **kws):
|
|
38
|
+
super().__init__(**kws)
|
|
39
|
+
self.output_types = [OutputType.dataframe]
|
|
40
|
+
|
|
41
|
+
def __call__(self, data):
|
|
42
|
+
if isinstance(data, (list, tuple)):
|
|
43
|
+
data = asseries(data)
|
|
44
|
+
elif isinstance(data, pd.Series):
|
|
45
|
+
data = from_pandas_series(data)
|
|
46
|
+
elif isinstance(data, pd.DataFrame):
|
|
47
|
+
data = from_pandas_df(data)
|
|
48
|
+
|
|
49
|
+
if self.prefix is not None:
|
|
50
|
+
if isinstance(self.prefix, list):
|
|
51
|
+
if self.columns is not None:
|
|
52
|
+
encoding_col_num = len(self.columns)
|
|
53
|
+
else:
|
|
54
|
+
encoding_col_num = 0
|
|
55
|
+
for dtype in data.dtypes.values:
|
|
56
|
+
if dtype.kind in _encoding_dtype_kind:
|
|
57
|
+
encoding_col_num += 1
|
|
58
|
+
prefix_num = len(self.prefix)
|
|
59
|
+
if prefix_num != encoding_col_num:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
f"Length of 'prefix' ({prefix_num}) did not match "
|
|
62
|
+
+ f"the length of the columns being encoded ({encoding_col_num})"
|
|
63
|
+
)
|
|
64
|
+
elif isinstance(self.prefix, dict):
|
|
65
|
+
if self.columns is not None:
|
|
66
|
+
encoding_col_num = len(self.columns)
|
|
67
|
+
prefix_num = len(self.prefix)
|
|
68
|
+
if prefix_num != encoding_col_num:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Length of 'prefix' ({prefix_num}) did not match "
|
|
71
|
+
+ f"the length of the columns being encoded ({encoding_col_num})"
|
|
72
|
+
)
|
|
73
|
+
columns = self.prefix.keys()
|
|
74
|
+
for columns_columnname, prefix_columnname in zip(
|
|
75
|
+
columns, list(self.columns)
|
|
76
|
+
):
|
|
77
|
+
if columns_columnname != prefix_columnname:
|
|
78
|
+
raise KeyError(f"{columns_columnname}")
|
|
79
|
+
else:
|
|
80
|
+
self.columns = list(self.prefix.keys())
|
|
81
|
+
# Convert prefix from dict to list, to simplify tile work
|
|
82
|
+
self.prefix = list(self.prefix.values())
|
|
83
|
+
|
|
84
|
+
return self.new_dataframe(
|
|
85
|
+
[data],
|
|
86
|
+
shape=(np.nan, np.nan),
|
|
87
|
+
dtypes=None,
|
|
88
|
+
index_value=data.index_value,
|
|
89
|
+
columns_value=None,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_dummies(
|
|
94
|
+
data,
|
|
95
|
+
prefix=None,
|
|
96
|
+
prefix_sep="_",
|
|
97
|
+
dummy_na=False,
|
|
98
|
+
columns=None,
|
|
99
|
+
sparse=False,
|
|
100
|
+
drop_first=False,
|
|
101
|
+
dtype=None,
|
|
102
|
+
):
|
|
103
|
+
"""
|
|
104
|
+
Convert categorical variable into dummy/indicator variables.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
data : array-like, Series, or DataFrame
|
|
109
|
+
Data of which to get dummy indicators.
|
|
110
|
+
prefix : str, list of str, or dict of str, default None
|
|
111
|
+
String to append DataFrame column names.
|
|
112
|
+
Pass a list with length equal to the number of columns
|
|
113
|
+
when calling get_dummies on a DataFrame. Alternatively, `prefix`
|
|
114
|
+
can be a dictionary mapping column names to prefixes.
|
|
115
|
+
prefix_sep : str, default '_'
|
|
116
|
+
If appending prefix, separator/delimiter to use. Or pass a
|
|
117
|
+
list or dictionary as with `prefix`.
|
|
118
|
+
dummy_na : bool, default False
|
|
119
|
+
Add a column to indicate NaNs, if False NaNs are ignored.
|
|
120
|
+
columns : list-like, default None
|
|
121
|
+
Column names in the DataFrame to be encoded.
|
|
122
|
+
If `columns` is None then all the columns with
|
|
123
|
+
`object` or `category` dtype will be converted.
|
|
124
|
+
sparse : bool, default False
|
|
125
|
+
Whether the dummy-encoded columns should be backed by
|
|
126
|
+
a :class:`SparseArray` (True) or a regular NumPy array (False).
|
|
127
|
+
drop_first : bool, default False
|
|
128
|
+
Whether to get k-1 dummies out of k categorical levels by removing the
|
|
129
|
+
first level.
|
|
130
|
+
dtype : dtype, default np.uint8
|
|
131
|
+
Data type for new columns. Only a single dtype is allowed.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
DataFrame
|
|
136
|
+
Dummy-coded data.
|
|
137
|
+
|
|
138
|
+
Examples
|
|
139
|
+
--------
|
|
140
|
+
>>> import maxframe.dataframe as md
|
|
141
|
+
>>> s = md.Series(list('abca'))
|
|
142
|
+
|
|
143
|
+
>>> md.get_dummies(s).execute()
|
|
144
|
+
a b c
|
|
145
|
+
0 1 0 0
|
|
146
|
+
1 0 1 0
|
|
147
|
+
2 0 0 1
|
|
148
|
+
3 1 0 0
|
|
149
|
+
|
|
150
|
+
>>> s1 = ['a', 'b', np.nan]
|
|
151
|
+
|
|
152
|
+
>>> md.get_dummies(s1).execute()
|
|
153
|
+
a b
|
|
154
|
+
0 1 0
|
|
155
|
+
1 0 1
|
|
156
|
+
2 0 0
|
|
157
|
+
|
|
158
|
+
>>> md.get_dummies(s1, dummy_na=True).execute()
|
|
159
|
+
a b NaN
|
|
160
|
+
0 1 0 0
|
|
161
|
+
1 0 1 0
|
|
162
|
+
2 0 0 1
|
|
163
|
+
|
|
164
|
+
>>> df = md.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
|
|
165
|
+
... 'C': [1, 2, 3]})
|
|
166
|
+
|
|
167
|
+
>>> md.get_dummies(df, prefix=['col1', 'col2']).execute()
|
|
168
|
+
C col1_a col1_b col2_a col2_b col2_c
|
|
169
|
+
0 1 1 0 0 1 0
|
|
170
|
+
1 2 0 1 1 0 0
|
|
171
|
+
2 3 1 0 0 0 1
|
|
172
|
+
|
|
173
|
+
>>> md.get_dummies(pd.Series(list('abcaa'))).execute()
|
|
174
|
+
a b c
|
|
175
|
+
0 1 0 0
|
|
176
|
+
1 0 1 0
|
|
177
|
+
2 0 0 1
|
|
178
|
+
3 1 0 0
|
|
179
|
+
4 1 0 0
|
|
180
|
+
|
|
181
|
+
>>> md.get_dummies(pd.Series(list('abcaa')), drop_first=True).execute()
|
|
182
|
+
b c
|
|
183
|
+
0 0 0
|
|
184
|
+
1 1 0
|
|
185
|
+
2 0 1
|
|
186
|
+
3 0 0
|
|
187
|
+
4 0 0
|
|
188
|
+
|
|
189
|
+
>>> md.get_dummies(pd.Series(list('abc')), dtype=float).execute()
|
|
190
|
+
a b c
|
|
191
|
+
0 1.0 0.0 0.0
|
|
192
|
+
1 0.0 1.0 0.0
|
|
193
|
+
2 0.0 0.0 1.0
|
|
194
|
+
"""
|
|
195
|
+
if columns is not None and not isinstance(columns, list):
|
|
196
|
+
raise TypeError("Input must be a list-like for parameter `columns`")
|
|
197
|
+
|
|
198
|
+
op = DataFrameGetDummies(
|
|
199
|
+
prefix=prefix,
|
|
200
|
+
prefix_sep=prefix_sep,
|
|
201
|
+
dummy_na=dummy_na,
|
|
202
|
+
columns=columns,
|
|
203
|
+
sparse=sparse,
|
|
204
|
+
drop_first=drop_first,
|
|
205
|
+
dtype=dtype,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return op(data)
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from pandas.api.types import is_list_like
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...core import ENTITY_TYPE
|
|
21
|
+
from ...serialization.serializables import AnyField, KeyField
|
|
22
|
+
from ...tensor.core import TENSOR_TYPE
|
|
23
|
+
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
24
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataFrameIsin(DataFrameOperator, DataFrameOperatorMixin):
|
|
28
|
+
_op_type_ = opcodes.ISIN
|
|
29
|
+
|
|
30
|
+
input = KeyField("input")
|
|
31
|
+
values = AnyField("values", default=None)
|
|
32
|
+
|
|
33
|
+
def _set_inputs(self, inputs):
|
|
34
|
+
super()._set_inputs(inputs)
|
|
35
|
+
inputs_iter = iter(self._inputs)
|
|
36
|
+
self.input = next(inputs_iter)
|
|
37
|
+
if len(self._inputs) > 1:
|
|
38
|
+
if isinstance(self.values, dict):
|
|
39
|
+
new_values = dict()
|
|
40
|
+
for k, v in self.values.items():
|
|
41
|
+
if isinstance(v, ENTITY_TYPE):
|
|
42
|
+
new_values[k] = next(inputs_iter)
|
|
43
|
+
else:
|
|
44
|
+
new_values[k] = v
|
|
45
|
+
self.values = new_values
|
|
46
|
+
else:
|
|
47
|
+
self.values = self._inputs[1]
|
|
48
|
+
|
|
49
|
+
def __call__(self, elements):
|
|
50
|
+
inputs = [elements]
|
|
51
|
+
if isinstance(self.values, ENTITY_TYPE):
|
|
52
|
+
inputs.append(self.values)
|
|
53
|
+
elif isinstance(self.values, dict):
|
|
54
|
+
for v in self.values.values():
|
|
55
|
+
if isinstance(v, ENTITY_TYPE):
|
|
56
|
+
inputs.append(v)
|
|
57
|
+
|
|
58
|
+
if elements.ndim == 1:
|
|
59
|
+
return self.new_series(
|
|
60
|
+
inputs,
|
|
61
|
+
shape=elements.shape,
|
|
62
|
+
dtype=np.dtype("bool"),
|
|
63
|
+
index_value=elements.index_value,
|
|
64
|
+
name=elements.name,
|
|
65
|
+
)
|
|
66
|
+
else:
|
|
67
|
+
dtypes = pd.Series(
|
|
68
|
+
[np.dtype(bool) for _ in elements.dtypes], index=elements.dtypes.index
|
|
69
|
+
)
|
|
70
|
+
return self.new_dataframe(
|
|
71
|
+
inputs,
|
|
72
|
+
shape=elements.shape,
|
|
73
|
+
index_value=elements.index_value,
|
|
74
|
+
columns_value=elements.columns_value,
|
|
75
|
+
dtypes=dtypes,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def series_isin(elements, values):
|
|
80
|
+
"""
|
|
81
|
+
Whether elements in Series are contained in `values`.
|
|
82
|
+
|
|
83
|
+
Return a boolean Series showing whether each element in the Series
|
|
84
|
+
matches an element in the passed sequence of `values` exactly.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
values : set or list-like
|
|
89
|
+
The sequence of values to test. Passing in a single string will
|
|
90
|
+
raise a ``TypeError``. Instead, turn a single string into a
|
|
91
|
+
list of one element.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
Series
|
|
96
|
+
Series of booleans indicating if each element is in values.
|
|
97
|
+
|
|
98
|
+
Raises
|
|
99
|
+
------
|
|
100
|
+
TypeError
|
|
101
|
+
* If `values` is a string
|
|
102
|
+
|
|
103
|
+
See Also
|
|
104
|
+
--------
|
|
105
|
+
DataFrame.isin : Equivalent method on DataFrame.
|
|
106
|
+
|
|
107
|
+
Examples
|
|
108
|
+
--------
|
|
109
|
+
>>> import maxframe.dataframe as md
|
|
110
|
+
>>> s = md.Series(['lame', 'cow', 'lame', 'beetle', 'lame',
|
|
111
|
+
... 'hippo'], name='animal')
|
|
112
|
+
>>> s.isin(['cow', 'lame']).execute()
|
|
113
|
+
0 True
|
|
114
|
+
1 True
|
|
115
|
+
2 True
|
|
116
|
+
3 False
|
|
117
|
+
4 True
|
|
118
|
+
5 False
|
|
119
|
+
Name: animal, dtype: bool
|
|
120
|
+
|
|
121
|
+
Passing a single string as ``s.isin('lame')`` will raise an error. Use
|
|
122
|
+
a list of one element instead:
|
|
123
|
+
|
|
124
|
+
>>> s.isin(['lame']).execute()
|
|
125
|
+
0 True
|
|
126
|
+
1 False
|
|
127
|
+
2 True
|
|
128
|
+
3 False
|
|
129
|
+
4 True
|
|
130
|
+
5 False
|
|
131
|
+
Name: animal, dtype: bool
|
|
132
|
+
"""
|
|
133
|
+
if is_list_like(values):
|
|
134
|
+
values = list(values)
|
|
135
|
+
elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
|
|
136
|
+
raise TypeError(
|
|
137
|
+
"only list-like objects are allowed to be passed to isin(), "
|
|
138
|
+
f"you passed a [{type(values)}]"
|
|
139
|
+
)
|
|
140
|
+
op = DataFrameIsin(values=values)
|
|
141
|
+
return op(elements)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def df_isin(df, values):
|
|
145
|
+
"""
|
|
146
|
+
Whether each element in the DataFrame is contained in values.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
values : iterable, Series, DataFrame or dict
|
|
151
|
+
The result will only be true at a location if all the
|
|
152
|
+
labels match. If `values` is a Series, that's the index. If
|
|
153
|
+
`values` is a dict, the keys must be the column names,
|
|
154
|
+
which must match. If `values` is a DataFrame,
|
|
155
|
+
then both the index and column labels must match.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
DataFrame
|
|
160
|
+
DataFrame of booleans showing whether each element in the DataFrame
|
|
161
|
+
is contained in values.
|
|
162
|
+
|
|
163
|
+
See Also
|
|
164
|
+
--------
|
|
165
|
+
DataFrame.eq: Equality test for DataFrame.
|
|
166
|
+
Series.isin: Equivalent method on Series.
|
|
167
|
+
Series.str.contains: Test if pattern or regex is contained within a
|
|
168
|
+
string of a Series or Index.
|
|
169
|
+
|
|
170
|
+
Examples
|
|
171
|
+
--------
|
|
172
|
+
>>> import maxframe.dataframe as md
|
|
173
|
+
>>> df = md.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
|
|
174
|
+
... index=['falcon', 'dog'])
|
|
175
|
+
>>> df.execute()
|
|
176
|
+
num_legs num_wings
|
|
177
|
+
falcon 2 2
|
|
178
|
+
dog 4 0
|
|
179
|
+
|
|
180
|
+
When ``values`` is a list check whether every value in the DataFrame
|
|
181
|
+
is present in the list (which animals have 0 or 2 legs or wings)
|
|
182
|
+
|
|
183
|
+
>>> df.isin([0, 2]).execute()
|
|
184
|
+
num_legs num_wings
|
|
185
|
+
falcon True True
|
|
186
|
+
dog False True
|
|
187
|
+
|
|
188
|
+
When ``values`` is a dict, we can pass values to check for each
|
|
189
|
+
column separately:
|
|
190
|
+
|
|
191
|
+
>>> df.isin({'num_wings': [0, 3]}).execute()
|
|
192
|
+
num_legs num_wings
|
|
193
|
+
falcon False False
|
|
194
|
+
dog False True
|
|
195
|
+
|
|
196
|
+
When ``values`` is a Series or DataFrame the index and column must
|
|
197
|
+
match. Note that 'falcon' does not match based on the number of legs
|
|
198
|
+
in df2.
|
|
199
|
+
|
|
200
|
+
>>> other = md.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]},
|
|
201
|
+
... index=['spider', 'falcon'])
|
|
202
|
+
>>> df.isin(other).execute()
|
|
203
|
+
num_legs num_wings
|
|
204
|
+
falcon True True
|
|
205
|
+
dog False False
|
|
206
|
+
"""
|
|
207
|
+
if is_list_like(values) and not isinstance(values, dict):
|
|
208
|
+
values = list(values)
|
|
209
|
+
elif not isinstance(
|
|
210
|
+
values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)
|
|
211
|
+
):
|
|
212
|
+
raise TypeError(
|
|
213
|
+
"only list-like objects or dict are allowed to be passed to isin(), "
|
|
214
|
+
f"you passed a [{type(values)}]"
|
|
215
|
+
)
|
|
216
|
+
op = DataFrameIsin(values=values)
|
|
217
|
+
return op(df)
|