maxframe 0.1.0b5__cp38-cp38-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-38-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-38-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-38-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +2 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import pytest
|
|
18
|
+
|
|
19
|
+
from ...core import IndexValue
|
|
20
|
+
from ...datasource.dataframe import from_pandas
|
|
21
|
+
from .. import DataFrameMerge, concat
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_merge():
|
|
25
|
+
df1 = pd.DataFrame(
|
|
26
|
+
np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"]
|
|
27
|
+
)
|
|
28
|
+
df2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
|
|
29
|
+
|
|
30
|
+
mdf1 = from_pandas(df1, chunk_size=2)
|
|
31
|
+
mdf2 = from_pandas(df2, chunk_size=3)
|
|
32
|
+
|
|
33
|
+
parameters = [
|
|
34
|
+
{},
|
|
35
|
+
{"how": "left", "right_on": "x", "left_index": True},
|
|
36
|
+
{"how": "right", "left_on": "a", "right_index": True},
|
|
37
|
+
{"how": "left", "left_on": "a", "right_on": "x"},
|
|
38
|
+
{"how": "right", "left_on": "a", "right_index": True},
|
|
39
|
+
{"how": "right", "on": "a"},
|
|
40
|
+
{"how": "inner", "on": ["a", "b"]},
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
for kw in parameters:
|
|
44
|
+
df = mdf1.merge(mdf2, **kw)
|
|
45
|
+
|
|
46
|
+
assert isinstance(df.op, DataFrameMerge)
|
|
47
|
+
assert df.op.how == kw.get("how", "inner")
|
|
48
|
+
pd.testing.assert_index_equal(
|
|
49
|
+
df.columns_value.to_pandas(), df.columns_value.to_pandas()
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_merge_invalid_parameters():
|
|
54
|
+
pdf1 = pd.DataFrame(
|
|
55
|
+
np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"]
|
|
56
|
+
)
|
|
57
|
+
pdf2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
|
|
58
|
+
|
|
59
|
+
df1 = from_pandas(pdf1, chunk_size=2)
|
|
60
|
+
df2 = from_pandas(pdf2, chunk_size=3)
|
|
61
|
+
|
|
62
|
+
with pytest.raises(ValueError):
|
|
63
|
+
df1.merge(df2, bloom_filter="wrong")
|
|
64
|
+
|
|
65
|
+
with pytest.raises(TypeError):
|
|
66
|
+
df1.merge(df2, bloom_filter_options="wrong")
|
|
67
|
+
|
|
68
|
+
with pytest.raises(ValueError):
|
|
69
|
+
df1.merge(df2, bloom_filter_options={"wrong": 1})
|
|
70
|
+
|
|
71
|
+
with pytest.raises(ValueError):
|
|
72
|
+
df1.merge(df2, bloom_filter_options={"filter": "wrong"})
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_join():
|
|
76
|
+
df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]], index=["a1", "a2", "a3"])
|
|
77
|
+
df2 = pd.DataFrame([[1, 2, 3], [1, 5, 6], [7, 8, 9]], index=["a1", "b2", "b3"]) + 1
|
|
78
|
+
df2 = pd.concat([df2, df2 + 1])
|
|
79
|
+
|
|
80
|
+
mdf1 = from_pandas(df1, chunk_size=2)
|
|
81
|
+
mdf2 = from_pandas(df2, chunk_size=2)
|
|
82
|
+
|
|
83
|
+
parameters = [
|
|
84
|
+
{"lsuffix": "l_", "rsuffix": "r_"},
|
|
85
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "left"},
|
|
86
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "right"},
|
|
87
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "inner"},
|
|
88
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "left"},
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
for kw in parameters:
|
|
92
|
+
df = mdf1.join(mdf2, auto_merge="none", bloom_filter=False, **kw)
|
|
93
|
+
|
|
94
|
+
assert isinstance(df.op, DataFrameMerge)
|
|
95
|
+
assert df.op.how == kw.get("how", "left")
|
|
96
|
+
pd.testing.assert_index_equal(
|
|
97
|
+
df.columns_value.to_pandas(), df.columns_value.to_pandas()
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_join_on():
|
|
102
|
+
df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]], columns=["a1", "a2", "a3"])
|
|
103
|
+
df2 = (
|
|
104
|
+
pd.DataFrame([[1, 2, 3], [1, 5, 6], [7, 8, 9]], columns=["a1", "b2", "b3"]) + 1
|
|
105
|
+
)
|
|
106
|
+
df2 = pd.concat([df2, df2 + 1])
|
|
107
|
+
|
|
108
|
+
mdf1 = from_pandas(df1, chunk_size=2)
|
|
109
|
+
mdf2 = from_pandas(df2, chunk_size=2)
|
|
110
|
+
|
|
111
|
+
parameters = [
|
|
112
|
+
{"lsuffix": "l_", "rsuffix": "r_"},
|
|
113
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "left", "on": "a1"},
|
|
114
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "right", "on": "a2"},
|
|
115
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "inner", "on": "a2"},
|
|
116
|
+
{"lsuffix": "l_", "rsuffix": "r_", "how": "outer", "on": "a2"},
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
for kw in parameters:
|
|
120
|
+
df = mdf1.join(mdf2, auto_merge="none", bloom_filter=False, **kw)
|
|
121
|
+
|
|
122
|
+
assert isinstance(df.op, DataFrameMerge)
|
|
123
|
+
assert df.op.how == kw.get("how", "left")
|
|
124
|
+
pd.testing.assert_index_equal(
|
|
125
|
+
df.columns_value.to_pandas(), df.columns_value.to_pandas()
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_append():
|
|
130
|
+
df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
|
|
131
|
+
df2 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
|
|
132
|
+
|
|
133
|
+
mdf1 = from_pandas(df1, chunk_size=3)
|
|
134
|
+
mdf2 = from_pandas(df2, chunk_size=3)
|
|
135
|
+
adf = mdf1.append(mdf2)
|
|
136
|
+
|
|
137
|
+
assert adf.shape == (20, 4)
|
|
138
|
+
assert isinstance(adf.index_value.value, IndexValue.Int64Index)
|
|
139
|
+
|
|
140
|
+
mdf1 = from_pandas(df1, chunk_size=3)
|
|
141
|
+
mdf2 = from_pandas(df2, chunk_size=3)
|
|
142
|
+
adf = mdf1.append(mdf2, ignore_index=True)
|
|
143
|
+
|
|
144
|
+
assert adf.shape == (20, 4)
|
|
145
|
+
assert isinstance(adf.index_value.value, IndexValue.RangeIndex)
|
|
146
|
+
pd.testing.assert_index_equal(adf.index_value.to_pandas(), pd.RangeIndex(20))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_concat():
|
|
150
|
+
df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
|
|
151
|
+
df2 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
|
|
152
|
+
|
|
153
|
+
mdf1 = from_pandas(df1, chunk_size=4)
|
|
154
|
+
mdf2 = from_pandas(df2, chunk_size=4)
|
|
155
|
+
r = concat([mdf1, mdf2], axis="index")
|
|
156
|
+
|
|
157
|
+
assert r.shape == (20, 4)
|
|
158
|
+
pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
|
|
159
|
+
|
|
160
|
+
df3 = pd.DataFrame(
|
|
161
|
+
np.random.rand(10, 4), columns=list("ABCD"), index=pd.RangeIndex(10, 20)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
mdf3 = from_pandas(df3, chunk_size=4)
|
|
165
|
+
r = concat([mdf1, mdf3], axis="index")
|
|
166
|
+
|
|
167
|
+
assert r.shape == (20, 4)
|
|
168
|
+
pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
|
|
169
|
+
pd.testing.assert_index_equal(r.index_value.to_pandas(), pd.RangeIndex(20))
|
|
170
|
+
|
|
171
|
+
df4 = pd.DataFrame(
|
|
172
|
+
np.random.rand(10, 4),
|
|
173
|
+
columns=list("ABCD"),
|
|
174
|
+
index=np.random.permutation(np.arange(10)),
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
mdf4 = from_pandas(df4, chunk_size=4)
|
|
178
|
+
r = concat([mdf1, mdf4], axis="index")
|
|
179
|
+
|
|
180
|
+
assert r.shape == (20, 4)
|
|
181
|
+
pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
|
|
182
|
+
pd.testing.assert_index_equal(
|
|
183
|
+
r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
r = concat([mdf4, mdf1], axis="index")
|
|
187
|
+
|
|
188
|
+
assert r.shape == (20, 4)
|
|
189
|
+
pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
|
|
190
|
+
pd.testing.assert_index_equal(
|
|
191
|
+
r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
r = concat([mdf4, mdf4], axis="index")
|
|
195
|
+
|
|
196
|
+
assert r.shape == (20, 4)
|
|
197
|
+
pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
|
|
198
|
+
pd.testing.assert_index_equal(
|
|
199
|
+
r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
mdf1 = from_pandas(df1, chunk_size=3)
|
|
203
|
+
mdf2 = from_pandas(df2, chunk_size=4)
|
|
204
|
+
r = concat([mdf1, mdf2], axis="columns")
|
|
205
|
+
|
|
206
|
+
assert r.shape == (10, 8)
|
|
207
|
+
expected_dtypes = pd.concat([df1, df2], axis="columns").dtypes
|
|
208
|
+
pd.testing.assert_series_equal(r.dtypes, expected_dtypes)
|
|
209
|
+
|
|
210
|
+
df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
|
|
211
|
+
df2 = pd.DataFrame(np.random.rand(10, 3), columns=list("ABC"))
|
|
212
|
+
mdf1 = from_pandas(df1, chunk_size=3)
|
|
213
|
+
mdf2 = from_pandas(df2, chunk_size=3)
|
|
214
|
+
r = concat([mdf1, mdf2], join="inner")
|
|
215
|
+
assert r.shape == (20, 3)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .apply import df_apply, series_apply
|
|
16
|
+
from .astype import astype, index_astype
|
|
17
|
+
from .case_when import case_when
|
|
18
|
+
from .check_monotonic import (
|
|
19
|
+
check_monotonic,
|
|
20
|
+
is_monotonic,
|
|
21
|
+
is_monotonic_decreasing,
|
|
22
|
+
is_monotonic_increasing,
|
|
23
|
+
)
|
|
24
|
+
from .cut import cut
|
|
25
|
+
from .describe import describe
|
|
26
|
+
from .diff import df_diff, series_diff
|
|
27
|
+
from .drop import df_drop, df_pop, index_drop, series_drop
|
|
28
|
+
from .drop_duplicates import (
|
|
29
|
+
df_drop_duplicates,
|
|
30
|
+
index_drop_duplicates,
|
|
31
|
+
series_drop_duplicates,
|
|
32
|
+
)
|
|
33
|
+
from .duplicated import df_duplicated, index_duplicated, series_duplicated
|
|
34
|
+
from .eval import df_eval, df_query
|
|
35
|
+
from .explode import df_explode, series_explode
|
|
36
|
+
from .isin import df_isin, series_isin
|
|
37
|
+
from .map import index_map, series_map
|
|
38
|
+
from .melt import melt
|
|
39
|
+
from .memory_usage import df_memory_usage, index_memory_usage, series_memory_usage
|
|
40
|
+
from .pct_change import pct_change
|
|
41
|
+
from .pivot_table import pivot_table
|
|
42
|
+
from .qcut import qcut
|
|
43
|
+
from .select_dtypes import select_dtypes
|
|
44
|
+
from .shift import shift, tshift
|
|
45
|
+
from .stack import stack
|
|
46
|
+
from .transform import df_transform, series_transform
|
|
47
|
+
from .transpose import transpose
|
|
48
|
+
from .value_counts import value_counts
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _install():
|
|
52
|
+
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
53
|
+
from .accessor import CachedAccessor, DatetimeAccessor, StringAccessor
|
|
54
|
+
from .datetimes import _datetime_method_to_handlers
|
|
55
|
+
from .string_ import _string_method_to_handlers
|
|
56
|
+
|
|
57
|
+
for t in DATAFRAME_TYPE:
|
|
58
|
+
setattr(t, "apply", df_apply)
|
|
59
|
+
setattr(t, "astype", astype)
|
|
60
|
+
setattr(t, "describe", describe)
|
|
61
|
+
setattr(
|
|
62
|
+
t, "__delitem__", lambda df, items: df_drop(df, items, axis=1, inplace=True)
|
|
63
|
+
)
|
|
64
|
+
setattr(t, "diff", df_diff)
|
|
65
|
+
setattr(t, "drop_duplicates", df_drop_duplicates)
|
|
66
|
+
setattr(t, "duplicated", df_duplicated)
|
|
67
|
+
setattr(t, "drop", df_drop)
|
|
68
|
+
setattr(t, "eval", df_eval)
|
|
69
|
+
setattr(t, "explode", df_explode)
|
|
70
|
+
setattr(t, "isin", df_isin)
|
|
71
|
+
setattr(t, "melt", melt)
|
|
72
|
+
setattr(t, "memory_usage", df_memory_usage)
|
|
73
|
+
setattr(t, "pct_change", pct_change)
|
|
74
|
+
setattr(t, "pivot_table", pivot_table)
|
|
75
|
+
setattr(t, "pop", df_pop)
|
|
76
|
+
setattr(t, "query", df_query)
|
|
77
|
+
setattr(t, "select_dtypes", select_dtypes)
|
|
78
|
+
setattr(t, "shift", shift)
|
|
79
|
+
setattr(t, "stack", stack)
|
|
80
|
+
setattr(t, "transform", df_transform)
|
|
81
|
+
setattr(t, "transpose", transpose)
|
|
82
|
+
setattr(t, "tshift", tshift)
|
|
83
|
+
|
|
84
|
+
for t in SERIES_TYPE:
|
|
85
|
+
setattr(t, "apply", series_apply)
|
|
86
|
+
setattr(t, "astype", astype)
|
|
87
|
+
setattr(t, "case_when", case_when)
|
|
88
|
+
setattr(t, "check_monotonic", check_monotonic)
|
|
89
|
+
setattr(t, "describe", describe)
|
|
90
|
+
setattr(t, "diff", series_diff)
|
|
91
|
+
setattr(t, "drop", series_drop)
|
|
92
|
+
setattr(t, "drop_duplicates", series_drop_duplicates)
|
|
93
|
+
setattr(t, "duplicated", series_duplicated)
|
|
94
|
+
setattr(t, "explode", series_explode)
|
|
95
|
+
setattr(t, "is_monotonic", property(fget=is_monotonic))
|
|
96
|
+
setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
|
|
97
|
+
setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
|
|
98
|
+
setattr(t, "isin", series_isin)
|
|
99
|
+
setattr(t, "map", series_map)
|
|
100
|
+
setattr(t, "memory_usage", series_memory_usage)
|
|
101
|
+
setattr(t, "pct_change", pct_change)
|
|
102
|
+
setattr(t, "shift", shift)
|
|
103
|
+
setattr(t, "transform", series_transform)
|
|
104
|
+
setattr(t, "tshift", tshift)
|
|
105
|
+
setattr(t, "value_counts", value_counts)
|
|
106
|
+
|
|
107
|
+
for t in INDEX_TYPE:
|
|
108
|
+
setattr(t, "astype", index_astype)
|
|
109
|
+
setattr(t, "check_monotonic", check_monotonic)
|
|
110
|
+
setattr(t, "drop", index_drop)
|
|
111
|
+
setattr(t, "drop_duplicates", index_drop_duplicates)
|
|
112
|
+
setattr(t, "duplicated", index_duplicated)
|
|
113
|
+
setattr(t, "is_monotonic", property(fget=is_monotonic))
|
|
114
|
+
setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
|
|
115
|
+
setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
|
|
116
|
+
setattr(t, "map", index_map)
|
|
117
|
+
setattr(t, "memory_usage", index_memory_usage)
|
|
118
|
+
setattr(t, "value_counts", value_counts)
|
|
119
|
+
|
|
120
|
+
for method in _string_method_to_handlers:
|
|
121
|
+
if not hasattr(StringAccessor, method):
|
|
122
|
+
StringAccessor._register(method)
|
|
123
|
+
|
|
124
|
+
for method in _datetime_method_to_handlers:
|
|
125
|
+
if not hasattr(DatetimeAccessor, method):
|
|
126
|
+
DatetimeAccessor._register(method)
|
|
127
|
+
|
|
128
|
+
for series in SERIES_TYPE:
|
|
129
|
+
series.str = CachedAccessor("str", StringAccessor)
|
|
130
|
+
series.dt = CachedAccessor("dt", DatetimeAccessor)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
_install()
|
|
134
|
+
del _install
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from pandas.api.types import is_list_like
|
|
17
|
+
|
|
18
|
+
from ...core.operator import MapReduceOperator
|
|
19
|
+
from ...serialization.serializables import AnyField, KeyField, StringField
|
|
20
|
+
from ..operators import DataFrameOperatorMixin
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DuplicateOperand(MapReduceOperator, DataFrameOperatorMixin):
|
|
24
|
+
input = KeyField("input")
|
|
25
|
+
subset = AnyField("subset", default=None)
|
|
26
|
+
keep = AnyField("keep", default="first")
|
|
27
|
+
method = StringField("method", default=None)
|
|
28
|
+
|
|
29
|
+
def _set_inputs(self, inputs):
|
|
30
|
+
super()._set_inputs(inputs)
|
|
31
|
+
self.input = self._inputs[0]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def validate_subset(df, subset):
|
|
35
|
+
if subset is None:
|
|
36
|
+
return subset
|
|
37
|
+
if not is_list_like(subset):
|
|
38
|
+
subset = [subset]
|
|
39
|
+
else:
|
|
40
|
+
subset = list(subset)
|
|
41
|
+
|
|
42
|
+
for s in subset:
|
|
43
|
+
if s not in df.dtypes:
|
|
44
|
+
raise KeyError(pd.Index([s]))
|
|
45
|
+
|
|
46
|
+
return subset
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from functools import wraps
|
|
16
|
+
from typing import Iterable
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
from pandas.api.types import (
|
|
20
|
+
is_datetime64_dtype,
|
|
21
|
+
is_datetime64tz_dtype,
|
|
22
|
+
is_period_dtype,
|
|
23
|
+
is_timedelta64_dtype,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from ...utils import adapt_docstring
|
|
27
|
+
from .datetimes import SeriesDatetimeMethod, _datetime_method_to_handlers
|
|
28
|
+
from .string_ import SeriesStringMethod, _string_method_to_handlers
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class StringAccessor:
|
|
32
|
+
"""
|
|
33
|
+
Vectorized string functions for Series and Index.
|
|
34
|
+
NAs stay NA unless handled otherwise by a particular method.
|
|
35
|
+
Patterned after Python's string methods, with some inspiration from
|
|
36
|
+
R's stringr package.
|
|
37
|
+
Examples
|
|
38
|
+
--------
|
|
39
|
+
>>> import maxframe.dataframe as md
|
|
40
|
+
>>> s = md.Series(["A_Str_Series"])
|
|
41
|
+
>>> s.execute()
|
|
42
|
+
0 A_Str_Series
|
|
43
|
+
dtype: object
|
|
44
|
+
>>> s.str.split("_").execute()
|
|
45
|
+
0 [A, Str, Series]
|
|
46
|
+
dtype: object
|
|
47
|
+
>>> s.str.replace("_", "").execute()
|
|
48
|
+
0 AStrSeries
|
|
49
|
+
dtype: object
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, series):
|
|
53
|
+
self._series = series
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def _gen_func(cls, method):
|
|
57
|
+
@wraps(getattr(pd.Series.str, method))
|
|
58
|
+
def _inner(self, *args, **kwargs):
|
|
59
|
+
op = SeriesStringMethod(
|
|
60
|
+
method=method, method_args=args, method_kwargs=kwargs
|
|
61
|
+
)
|
|
62
|
+
return op(self._series)
|
|
63
|
+
|
|
64
|
+
_inner.__doc__ = adapt_docstring(getattr(pd.Series.str, method).__doc__)
|
|
65
|
+
return _inner
|
|
66
|
+
|
|
67
|
+
def __getitem__(self, item):
|
|
68
|
+
return self._gen_func("__getitem__")(self, item)
|
|
69
|
+
|
|
70
|
+
def __dir__(self) -> Iterable[str]:
|
|
71
|
+
s = set(super().__dir__())
|
|
72
|
+
s.update(_string_method_to_handlers.keys())
|
|
73
|
+
return list(s)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def _register(cls, method):
|
|
77
|
+
setattr(cls, method, cls._gen_func(method))
|
|
78
|
+
|
|
79
|
+
def split(self, pat=None, n=-1, expand=False):
|
|
80
|
+
r"""
|
|
81
|
+
Split strings around given separator/delimiter.
|
|
82
|
+
|
|
83
|
+
Splits the string in the Series/Index from the beginning,
|
|
84
|
+
at the specified delimiter string. Equivalent to :meth:`str.split`.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
pat : str, optional
|
|
89
|
+
String or regular expression to split on.
|
|
90
|
+
If not specified, split on whitespace.
|
|
91
|
+
n : int, default -1 (all)
|
|
92
|
+
Limit number of splits in output.
|
|
93
|
+
``None``, 0 and -1 will be interpreted as return all splits.
|
|
94
|
+
expand : bool, default False
|
|
95
|
+
Expand the splitted strings into separate columns.
|
|
96
|
+
|
|
97
|
+
* If ``True``, return DataFrame/MultiIndex expanding dimensionality.
|
|
98
|
+
* If ``False``, return Series/Index, containing lists of strings.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
Series, Index, DataFrame or MultiIndex
|
|
103
|
+
Type matches caller unless ``expand=True`` (see Notes).
|
|
104
|
+
|
|
105
|
+
See Also
|
|
106
|
+
--------
|
|
107
|
+
Series.str.split : Split strings around given separator/delimiter.
|
|
108
|
+
Series.str.rsplit : Splits string around given separator/delimiter,
|
|
109
|
+
starting from the right.
|
|
110
|
+
Series.str.join : Join lists contained as elements in the Series/Index
|
|
111
|
+
with passed delimiter.
|
|
112
|
+
str.split : Standard library version for split.
|
|
113
|
+
str.rsplit : Standard library version for rsplit.
|
|
114
|
+
|
|
115
|
+
Notes
|
|
116
|
+
-----
|
|
117
|
+
The handling of the `n` keyword depends on the number of found splits:
|
|
118
|
+
|
|
119
|
+
- If found splits > `n`, make first `n` splits only
|
|
120
|
+
- If found splits <= `n`, make all splits
|
|
121
|
+
- If for a certain row the number of found splits < `n`,
|
|
122
|
+
append `None` for padding up to `n` if ``expand=True``
|
|
123
|
+
|
|
124
|
+
If using ``expand=True``, Series and Index callers return DataFrame and
|
|
125
|
+
MultiIndex objects, respectively.
|
|
126
|
+
|
|
127
|
+
Examples
|
|
128
|
+
--------
|
|
129
|
+
>>> import numpy as np
|
|
130
|
+
>>> import maxframe.dataframe as md
|
|
131
|
+
>>> s = md.Series(["this is a regular sentence",
|
|
132
|
+
>>> "https://docs.python.org/3/tutorial/index.html",
|
|
133
|
+
>>> np.nan])
|
|
134
|
+
>>> s.execute()
|
|
135
|
+
0 this is a regular sentence
|
|
136
|
+
1 https://docs.python.org/3/tutorial/index.html
|
|
137
|
+
2 NaN
|
|
138
|
+
dtype: object
|
|
139
|
+
|
|
140
|
+
In the default setting, the string is split by whitespace.
|
|
141
|
+
|
|
142
|
+
>>> s.str.split().execute()
|
|
143
|
+
0 [this, is, a, regular, sentence]
|
|
144
|
+
1 [https://docs.python.org/3/tutorial/index.html]
|
|
145
|
+
2 NaN
|
|
146
|
+
dtype: object
|
|
147
|
+
|
|
148
|
+
Without the `n` parameter, the outputs of `rsplit` and `split`
|
|
149
|
+
are identical.
|
|
150
|
+
|
|
151
|
+
>>> s.str.rsplit().execute()
|
|
152
|
+
0 [this, is, a, regular, sentence]
|
|
153
|
+
1 [https://docs.python.org/3/tutorial/index.html]
|
|
154
|
+
2 NaN
|
|
155
|
+
dtype: object
|
|
156
|
+
|
|
157
|
+
The `n` parameter can be used to limit the number of splits on the
|
|
158
|
+
delimiter. The outputs of `split` and `rsplit` are different.
|
|
159
|
+
|
|
160
|
+
>>> s.str.split(n=2).execute()
|
|
161
|
+
0 [this, is, a regular sentence]
|
|
162
|
+
1 [https://docs.python.org/3/tutorial/index.html]
|
|
163
|
+
2 NaN
|
|
164
|
+
dtype: object
|
|
165
|
+
|
|
166
|
+
>>> s.str.rsplit(n=2).execute()
|
|
167
|
+
0 [this is a, regular, sentence]
|
|
168
|
+
1 [https://docs.python.org/3/tutorial/index.html]
|
|
169
|
+
2 NaN
|
|
170
|
+
dtype: object
|
|
171
|
+
|
|
172
|
+
The `pat` parameter can be used to split by other characters.
|
|
173
|
+
|
|
174
|
+
>>> s.str.split(pat = "/").execute()
|
|
175
|
+
0 [this is a regular sentence]
|
|
176
|
+
1 [https:, , docs.python.org, 3, tutorial, index...
|
|
177
|
+
2 NaN
|
|
178
|
+
dtype: object
|
|
179
|
+
|
|
180
|
+
When using ``expand=True``, the split elements will expand out into
|
|
181
|
+
separate columns. If NaN is present, it is propagated throughout
|
|
182
|
+
the columns during the split.
|
|
183
|
+
|
|
184
|
+
>>> s.str.split(expand=True).execute()
|
|
185
|
+
0 1 2 3
|
|
186
|
+
0 this is a regular
|
|
187
|
+
1 https://docs.python.org/3/tutorial/index.html None None None
|
|
188
|
+
2 NaN NaN NaN NaN \
|
|
189
|
+
4
|
|
190
|
+
0 sentence
|
|
191
|
+
1 None
|
|
192
|
+
2 NaN
|
|
193
|
+
|
|
194
|
+
For slightly more complex use cases like splitting the html document name
|
|
195
|
+
from a url, a combination of parameter settings can be used.
|
|
196
|
+
|
|
197
|
+
>>> s.str.rsplit("/", n=1, expand=True).execute()
|
|
198
|
+
0 1
|
|
199
|
+
0 this is a regular sentence None
|
|
200
|
+
1 https://docs.python.org/3/tutorial index.html
|
|
201
|
+
2 NaN NaN
|
|
202
|
+
|
|
203
|
+
Remember to escape special characters when explicitly using regular
|
|
204
|
+
expressions.
|
|
205
|
+
|
|
206
|
+
>>> s = pd.Series(["1+1=2"])
|
|
207
|
+
>>> s.str.split(r"\+|=", expand=True).execute()
|
|
208
|
+
0 1 2
|
|
209
|
+
0 1 1 2
|
|
210
|
+
"""
|
|
211
|
+
return self._gen_func("split")(self, pat=pat, n=n, expand=expand)
|
|
212
|
+
|
|
213
|
+
def rsplit(self, pat=None, n=-1, expand=False):
|
|
214
|
+
return self._gen_func("rsplit")(self, pat=pat, n=n, expand=expand)
|
|
215
|
+
|
|
216
|
+
def cat(self, others=None, sep=None, na_rep=None, join="left"):
|
|
217
|
+
return self._gen_func("cat")(
|
|
218
|
+
self, others=others, sep=sep, na_rep=na_rep, join=join
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
rsplit.__doc__ = adapt_docstring(pd.Series.str.rsplit.__doc__)
|
|
222
|
+
cat.__doc__ = adapt_docstring(pd.Series.str.cat.__doc__)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class DatetimeAccessor:
|
|
226
|
+
def __init__(self, series):
|
|
227
|
+
if (
|
|
228
|
+
not is_datetime64_dtype(series.dtype)
|
|
229
|
+
and not is_datetime64tz_dtype(series.dtype)
|
|
230
|
+
and not is_timedelta64_dtype(series.dtype)
|
|
231
|
+
and not is_period_dtype(series.dtype)
|
|
232
|
+
):
|
|
233
|
+
raise AttributeError("Can only use .dt accessor with datetimelike values")
|
|
234
|
+
self._series = series
|
|
235
|
+
|
|
236
|
+
@classmethod
|
|
237
|
+
def _gen_func(cls, method, is_property):
|
|
238
|
+
@wraps(getattr(pd.Series.dt, method))
|
|
239
|
+
def _inner(self, *args, **kwargs):
|
|
240
|
+
op = SeriesDatetimeMethod(
|
|
241
|
+
method=method,
|
|
242
|
+
is_property=is_property,
|
|
243
|
+
method_args=args,
|
|
244
|
+
method_kwargs=kwargs,
|
|
245
|
+
)
|
|
246
|
+
return op(self._series)
|
|
247
|
+
|
|
248
|
+
_inner.__doc__ = adapt_docstring(getattr(pd.Series.dt, method).__doc__)
|
|
249
|
+
return _inner
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
def _register(cls, method):
|
|
253
|
+
is_property = not callable(getattr(pd.Series.dt, method))
|
|
254
|
+
func = cls._gen_func(method, is_property)
|
|
255
|
+
if is_property:
|
|
256
|
+
func = property(func)
|
|
257
|
+
setattr(cls, method, func)
|
|
258
|
+
|
|
259
|
+
def __dir__(self) -> Iterable[str]:
|
|
260
|
+
s = set(super().__dir__())
|
|
261
|
+
s.update(_datetime_method_to_handlers.keys())
|
|
262
|
+
return list(s)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class CachedAccessor:
|
|
266
|
+
def __init__(self, name: str, accessor) -> None:
|
|
267
|
+
self._name = name
|
|
268
|
+
self._accessor = accessor
|
|
269
|
+
|
|
270
|
+
def __get__(self, obj, cls):
|
|
271
|
+
if obj is None:
|
|
272
|
+
# we're accessing the attribute of the class, i.e., Dataset.geo
|
|
273
|
+
return self._accessor
|
|
274
|
+
if self._name not in obj._accessors:
|
|
275
|
+
obj._accessors[self._name] = self._accessor(obj)
|
|
276
|
+
return obj._accessors[self._name]
|