PyPI - maxframe - Versions diffs - 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl - Mend

maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show

maxframe/__init__.py +32 -0
maxframe/_utils.cpython-311-darwin.so +0 -0
maxframe/_utils.pxd +33 -0
maxframe/_utils.pyx +547 -0
maxframe/codegen.py +528 -0
maxframe/config/__init__.py +15 -0
maxframe/config/config.py +443 -0
maxframe/config/tests/__init__.py +13 -0
maxframe/config/tests/test_config.py +103 -0
maxframe/config/tests/test_validators.py +34 -0
maxframe/config/validators.py +57 -0
maxframe/conftest.py +139 -0
maxframe/core/__init__.py +65 -0
maxframe/core/base.py +156 -0
maxframe/core/entity/__init__.py +44 -0
maxframe/core/entity/chunks.py +68 -0
maxframe/core/entity/core.py +152 -0
maxframe/core/entity/executable.py +337 -0
maxframe/core/entity/fuse.py +73 -0
maxframe/core/entity/objects.py +100 -0
maxframe/core/entity/output_types.py +90 -0
maxframe/core/entity/tileables.py +438 -0
maxframe/core/entity/utils.py +24 -0
maxframe/core/graph/__init__.py +17 -0
maxframe/core/graph/builder/__init__.py +16 -0
maxframe/core/graph/builder/base.py +86 -0
maxframe/core/graph/builder/chunk.py +430 -0
maxframe/core/graph/builder/tileable.py +34 -0
maxframe/core/graph/builder/utils.py +41 -0
maxframe/core/graph/core.cpython-311-darwin.so +0 -0
maxframe/core/graph/core.pyx +467 -0
maxframe/core/graph/entity.py +171 -0
maxframe/core/graph/tests/__init__.py +13 -0
maxframe/core/graph/tests/test_graph.py +205 -0
maxframe/core/mode.py +96 -0
maxframe/core/operator/__init__.py +34 -0
maxframe/core/operator/base.py +450 -0
maxframe/core/operator/core.py +276 -0
maxframe/core/operator/fetch.py +53 -0
maxframe/core/operator/fuse.py +29 -0
maxframe/core/operator/objects.py +72 -0
maxframe/core/operator/shuffle.py +111 -0
maxframe/core/operator/tests/__init__.py +13 -0
maxframe/core/operator/tests/test_core.py +64 -0
maxframe/core/tests/__init__.py +13 -0
maxframe/core/tests/test_mode.py +75 -0
maxframe/dataframe/__init__.py +81 -0
maxframe/dataframe/arithmetic/__init__.py +359 -0
maxframe/dataframe/arithmetic/abs.py +33 -0
maxframe/dataframe/arithmetic/add.py +60 -0
maxframe/dataframe/arithmetic/arccos.py +28 -0
maxframe/dataframe/arithmetic/arccosh.py +28 -0
maxframe/dataframe/arithmetic/arcsin.py +28 -0
maxframe/dataframe/arithmetic/arcsinh.py +28 -0
maxframe/dataframe/arithmetic/arctan.py +28 -0
maxframe/dataframe/arithmetic/arctanh.py +28 -0
maxframe/dataframe/arithmetic/around.py +152 -0
maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
maxframe/dataframe/arithmetic/ceil.py +28 -0
maxframe/dataframe/arithmetic/core.py +342 -0
maxframe/dataframe/arithmetic/cos.py +28 -0
maxframe/dataframe/arithmetic/cosh.py +28 -0
maxframe/dataframe/arithmetic/degrees.py +28 -0
maxframe/dataframe/arithmetic/docstring.py +442 -0
maxframe/dataframe/arithmetic/equal.py +56 -0
maxframe/dataframe/arithmetic/exp.py +28 -0
maxframe/dataframe/arithmetic/exp2.py +28 -0
maxframe/dataframe/arithmetic/expm1.py +28 -0
maxframe/dataframe/arithmetic/floor.py +28 -0
maxframe/dataframe/arithmetic/floordiv.py +64 -0
maxframe/dataframe/arithmetic/greater.py +57 -0
maxframe/dataframe/arithmetic/greater_equal.py +57 -0
maxframe/dataframe/arithmetic/invert.py +33 -0
maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
maxframe/dataframe/arithmetic/less.py +57 -0
maxframe/dataframe/arithmetic/less_equal.py +57 -0
maxframe/dataframe/arithmetic/log.py +28 -0
maxframe/dataframe/arithmetic/log10.py +28 -0
maxframe/dataframe/arithmetic/log2.py +28 -0
maxframe/dataframe/arithmetic/mod.py +60 -0
maxframe/dataframe/arithmetic/multiply.py +60 -0
maxframe/dataframe/arithmetic/negative.py +33 -0
maxframe/dataframe/arithmetic/not_equal.py +56 -0
maxframe/dataframe/arithmetic/power.py +68 -0
maxframe/dataframe/arithmetic/radians.py +28 -0
maxframe/dataframe/arithmetic/sin.py +28 -0
maxframe/dataframe/arithmetic/sinh.py +28 -0
maxframe/dataframe/arithmetic/sqrt.py +28 -0
maxframe/dataframe/arithmetic/subtract.py +64 -0
maxframe/dataframe/arithmetic/tan.py +28 -0
maxframe/dataframe/arithmetic/tanh.py +28 -0
maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
maxframe/dataframe/arithmetic/truediv.py +64 -0
maxframe/dataframe/arithmetic/trunc.py +28 -0
maxframe/dataframe/arrays.py +864 -0
maxframe/dataframe/core.py +2417 -0
maxframe/dataframe/datasource/__init__.py +15 -0
maxframe/dataframe/datasource/core.py +81 -0
maxframe/dataframe/datasource/dataframe.py +59 -0
maxframe/dataframe/datasource/date_range.py +504 -0
maxframe/dataframe/datasource/from_index.py +54 -0
maxframe/dataframe/datasource/from_records.py +107 -0
maxframe/dataframe/datasource/from_tensor.py +419 -0
maxframe/dataframe/datasource/index.py +117 -0
maxframe/dataframe/datasource/read_csv.py +528 -0
maxframe/dataframe/datasource/read_odps_query.py +299 -0
maxframe/dataframe/datasource/read_odps_table.py +253 -0
maxframe/dataframe/datasource/read_parquet.py +421 -0
maxframe/dataframe/datasource/series.py +55 -0
maxframe/dataframe/datasource/tests/__init__.py +13 -0
maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
maxframe/dataframe/datastore/__init__.py +26 -0
maxframe/dataframe/datastore/core.py +19 -0
maxframe/dataframe/datastore/to_csv.py +227 -0
maxframe/dataframe/datastore/to_odps.py +162 -0
maxframe/dataframe/extensions/__init__.py +41 -0
maxframe/dataframe/extensions/accessor.py +50 -0
maxframe/dataframe/extensions/reshuffle.py +83 -0
maxframe/dataframe/extensions/tests/__init__.py +13 -0
maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
maxframe/dataframe/fetch/__init__.py +15 -0
maxframe/dataframe/fetch/core.py +86 -0
maxframe/dataframe/groupby/__init__.py +82 -0
maxframe/dataframe/groupby/aggregation.py +350 -0
maxframe/dataframe/groupby/apply.py +251 -0
maxframe/dataframe/groupby/core.py +179 -0
maxframe/dataframe/groupby/cum.py +124 -0
maxframe/dataframe/groupby/fill.py +141 -0
maxframe/dataframe/groupby/getitem.py +92 -0
maxframe/dataframe/groupby/head.py +105 -0
maxframe/dataframe/groupby/sample.py +214 -0
maxframe/dataframe/groupby/tests/__init__.py +13 -0
maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
maxframe/dataframe/groupby/transform.py +255 -0
maxframe/dataframe/indexing/__init__.py +84 -0
maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
maxframe/dataframe/indexing/align.py +349 -0
maxframe/dataframe/indexing/at.py +83 -0
maxframe/dataframe/indexing/getitem.py +204 -0
maxframe/dataframe/indexing/iat.py +37 -0
maxframe/dataframe/indexing/iloc.py +566 -0
maxframe/dataframe/indexing/insert.py +86 -0
maxframe/dataframe/indexing/loc.py +411 -0
maxframe/dataframe/indexing/reindex.py +526 -0
maxframe/dataframe/indexing/rename.py +462 -0
maxframe/dataframe/indexing/rename_axis.py +209 -0
maxframe/dataframe/indexing/reset_index.py +402 -0
maxframe/dataframe/indexing/sample.py +221 -0
maxframe/dataframe/indexing/set_axis.py +194 -0
maxframe/dataframe/indexing/set_index.py +61 -0
maxframe/dataframe/indexing/setitem.py +130 -0
maxframe/dataframe/indexing/tests/__init__.py +13 -0
maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
maxframe/dataframe/indexing/where.py +308 -0
maxframe/dataframe/initializer.py +288 -0
maxframe/dataframe/merge/__init__.py +32 -0
maxframe/dataframe/merge/append.py +121 -0
maxframe/dataframe/merge/concat.py +325 -0
maxframe/dataframe/merge/merge.py +593 -0
maxframe/dataframe/merge/tests/__init__.py +13 -0
maxframe/dataframe/merge/tests/test_merge.py +215 -0
maxframe/dataframe/misc/__init__.py +134 -0
maxframe/dataframe/misc/_duplicate.py +46 -0
maxframe/dataframe/misc/accessor.py +276 -0
maxframe/dataframe/misc/apply.py +692 -0
maxframe/dataframe/misc/astype.py +236 -0
maxframe/dataframe/misc/case_when.py +141 -0
maxframe/dataframe/misc/check_monotonic.py +84 -0
maxframe/dataframe/misc/cut.py +383 -0
maxframe/dataframe/misc/datetimes.py +79 -0
maxframe/dataframe/misc/describe.py +108 -0
maxframe/dataframe/misc/diff.py +210 -0
maxframe/dataframe/misc/drop.py +440 -0
maxframe/dataframe/misc/drop_duplicates.py +248 -0
maxframe/dataframe/misc/duplicated.py +292 -0
maxframe/dataframe/misc/eval.py +728 -0
maxframe/dataframe/misc/explode.py +171 -0
maxframe/dataframe/misc/get_dummies.py +208 -0
maxframe/dataframe/misc/isin.py +217 -0
maxframe/dataframe/misc/map.py +236 -0
maxframe/dataframe/misc/melt.py +162 -0
maxframe/dataframe/misc/memory_usage.py +248 -0
maxframe/dataframe/misc/pct_change.py +150 -0
maxframe/dataframe/misc/pivot_table.py +262 -0
maxframe/dataframe/misc/qcut.py +104 -0
maxframe/dataframe/misc/select_dtypes.py +104 -0
maxframe/dataframe/misc/shift.py +256 -0
maxframe/dataframe/misc/stack.py +238 -0
maxframe/dataframe/misc/string_.py +221 -0
maxframe/dataframe/misc/tests/__init__.py +13 -0
maxframe/dataframe/misc/tests/test_misc.py +468 -0
maxframe/dataframe/misc/to_numeric.py +178 -0
maxframe/dataframe/misc/transform.py +361 -0
maxframe/dataframe/misc/transpose.py +136 -0
maxframe/dataframe/misc/value_counts.py +182 -0
maxframe/dataframe/missing/__init__.py +53 -0
maxframe/dataframe/missing/checkna.py +223 -0
maxframe/dataframe/missing/dropna.py +280 -0
maxframe/dataframe/missing/fillna.py +275 -0
maxframe/dataframe/missing/replace.py +439 -0
maxframe/dataframe/missing/tests/__init__.py +13 -0
maxframe/dataframe/missing/tests/test_missing.py +89 -0
maxframe/dataframe/operators.py +273 -0
maxframe/dataframe/plotting/__init__.py +40 -0
maxframe/dataframe/plotting/core.py +78 -0
maxframe/dataframe/plotting/tests/__init__.py +13 -0
maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
maxframe/dataframe/reduction/__init__.py +107 -0
maxframe/dataframe/reduction/aggregation.py +344 -0
maxframe/dataframe/reduction/all.py +78 -0
maxframe/dataframe/reduction/any.py +78 -0
maxframe/dataframe/reduction/core.py +837 -0
maxframe/dataframe/reduction/count.py +59 -0
maxframe/dataframe/reduction/cummax.py +30 -0
maxframe/dataframe/reduction/cummin.py +30 -0
maxframe/dataframe/reduction/cumprod.py +30 -0
maxframe/dataframe/reduction/cumsum.py +30 -0
maxframe/dataframe/reduction/custom_reduction.py +42 -0
maxframe/dataframe/reduction/kurtosis.py +104 -0
maxframe/dataframe/reduction/max.py +65 -0
maxframe/dataframe/reduction/mean.py +61 -0
maxframe/dataframe/reduction/min.py +65 -0
maxframe/dataframe/reduction/nunique.py +141 -0
maxframe/dataframe/reduction/prod.py +76 -0
maxframe/dataframe/reduction/reduction_size.py +36 -0
maxframe/dataframe/reduction/sem.py +69 -0
maxframe/dataframe/reduction/skew.py +89 -0
maxframe/dataframe/reduction/std.py +53 -0
maxframe/dataframe/reduction/str_concat.py +48 -0
maxframe/dataframe/reduction/sum.py +77 -0
maxframe/dataframe/reduction/tests/__init__.py +13 -0
maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
maxframe/dataframe/reduction/unique.py +90 -0
maxframe/dataframe/reduction/var.py +72 -0
maxframe/dataframe/sort/__init__.py +34 -0
maxframe/dataframe/sort/core.py +36 -0
maxframe/dataframe/sort/sort_index.py +153 -0
maxframe/dataframe/sort/sort_values.py +311 -0
maxframe/dataframe/sort/tests/__init__.py +13 -0
maxframe/dataframe/sort/tests/test_sort.py +81 -0
maxframe/dataframe/statistics/__init__.py +33 -0
maxframe/dataframe/statistics/corr.py +280 -0
maxframe/dataframe/statistics/quantile.py +341 -0
maxframe/dataframe/statistics/tests/__init__.py +13 -0
maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
maxframe/dataframe/tests/__init__.py +13 -0
maxframe/dataframe/tests/test_initializer.py +29 -0
maxframe/dataframe/tseries/__init__.py +13 -0
maxframe/dataframe/tseries/tests/__init__.py +13 -0
maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
maxframe/dataframe/tseries/to_datetime.py +297 -0
maxframe/dataframe/ufunc/__init__.py +27 -0
maxframe/dataframe/ufunc/tensor.py +54 -0
maxframe/dataframe/ufunc/ufunc.py +52 -0
maxframe/dataframe/utils.py +1267 -0
maxframe/dataframe/window/__init__.py +29 -0
maxframe/dataframe/window/aggregation.py +96 -0
maxframe/dataframe/window/core.py +69 -0
maxframe/dataframe/window/ewm.py +249 -0
maxframe/dataframe/window/expanding.py +147 -0
maxframe/dataframe/window/rolling.py +376 -0
maxframe/dataframe/window/tests/__init__.py +13 -0
maxframe/dataframe/window/tests/test_ewm.py +70 -0
maxframe/dataframe/window/tests/test_expanding.py +66 -0
maxframe/dataframe/window/tests/test_rolling.py +57 -0
maxframe/env.py +33 -0
maxframe/errors.py +21 -0
maxframe/extension.py +81 -0
maxframe/learn/__init__.py +17 -0
maxframe/learn/contrib/__init__.py +17 -0
maxframe/learn/contrib/pytorch/__init__.py +16 -0
maxframe/learn/contrib/pytorch/run_function.py +110 -0
maxframe/learn/contrib/pytorch/run_script.py +102 -0
maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
maxframe/learn/contrib/utils.py +52 -0
maxframe/learn/contrib/xgboost/__init__.py +26 -0
maxframe/learn/contrib/xgboost/classifier.py +86 -0
maxframe/learn/contrib/xgboost/core.py +156 -0
maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
maxframe/learn/contrib/xgboost/predict.py +138 -0
maxframe/learn/contrib/xgboost/regressor.py +78 -0
maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
maxframe/learn/contrib/xgboost/train.py +121 -0
maxframe/learn/utils/__init__.py +15 -0
maxframe/learn/utils/core.py +29 -0
maxframe/lib/__init__.py +15 -0
maxframe/lib/aio/__init__.py +27 -0
maxframe/lib/aio/_runners.py +162 -0
maxframe/lib/aio/_threads.py +35 -0
maxframe/lib/aio/base.py +82 -0
maxframe/lib/aio/file.py +85 -0
maxframe/lib/aio/isolation.py +100 -0
maxframe/lib/aio/lru.py +242 -0
maxframe/lib/aio/parallelism.py +37 -0
maxframe/lib/aio/tests/__init__.py +13 -0
maxframe/lib/aio/tests/test_aio_file.py +55 -0
maxframe/lib/compression.py +55 -0
maxframe/lib/cython/__init__.py +13 -0
maxframe/lib/cython/libcpp.pxd +30 -0
maxframe/lib/filesystem/__init__.py +21 -0
maxframe/lib/filesystem/_glob.py +173 -0
maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
maxframe/lib/filesystem/_oss_lib/common.py +198 -0
maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
maxframe/lib/filesystem/arrow.py +236 -0
maxframe/lib/filesystem/base.py +263 -0
maxframe/lib/filesystem/core.py +95 -0
maxframe/lib/filesystem/fsmap.py +164 -0
maxframe/lib/filesystem/hdfs.py +31 -0
maxframe/lib/filesystem/local.py +112 -0
maxframe/lib/filesystem/oss.py +157 -0
maxframe/lib/filesystem/tests/__init__.py +13 -0
maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
maxframe/lib/filesystem/tests/test_oss.py +182 -0
maxframe/lib/functools_compat.py +81 -0
maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
maxframe/lib/sparse/__init__.py +861 -0
maxframe/lib/sparse/array.py +1604 -0
maxframe/lib/sparse/core.py +92 -0
maxframe/lib/sparse/matrix.py +241 -0
maxframe/lib/sparse/tests/__init__.py +15 -0
maxframe/lib/sparse/tests/test_sparse.py +476 -0
maxframe/lib/sparse/vector.py +150 -0
maxframe/lib/tblib/LICENSE +20 -0
maxframe/lib/tblib/__init__.py +327 -0
maxframe/lib/tblib/cpython.py +83 -0
maxframe/lib/tblib/decorators.py +44 -0
maxframe/lib/tblib/pickling_support.py +90 -0
maxframe/lib/tests/__init__.py +13 -0
maxframe/lib/tests/test_wrapped_pickle.py +51 -0
maxframe/lib/version.py +620 -0
maxframe/lib/wrapped_pickle.py +139 -0
maxframe/mixin.py +100 -0
maxframe/odpsio/__init__.py +21 -0
maxframe/odpsio/arrow.py +91 -0
maxframe/odpsio/schema.py +364 -0
maxframe/odpsio/tableio.py +322 -0
maxframe/odpsio/tests/__init__.py +13 -0
maxframe/odpsio/tests/test_arrow.py +88 -0
maxframe/odpsio/tests/test_schema.py +297 -0
maxframe/odpsio/tests/test_tableio.py +136 -0
maxframe/odpsio/tests/test_volumeio.py +90 -0
maxframe/odpsio/volumeio.py +95 -0
maxframe/opcodes.py +590 -0
maxframe/protocol.py +415 -0
maxframe/remote/__init__.py +18 -0
maxframe/remote/core.py +210 -0
maxframe/remote/run_script.py +121 -0
maxframe/serialization/__init__.py +26 -0
maxframe/serialization/arrow.py +95 -0
maxframe/serialization/core.cpython-311-darwin.so +0 -0
maxframe/serialization/core.pxd +44 -0
maxframe/serialization/core.pyi +61 -0
maxframe/serialization/core.pyx +1094 -0
maxframe/serialization/exception.py +86 -0
maxframe/serialization/maxframe_objects.py +39 -0
maxframe/serialization/numpy.py +91 -0
maxframe/serialization/pandas.py +202 -0
maxframe/serialization/scipy.py +71 -0
maxframe/serialization/serializables/__init__.py +55 -0
maxframe/serialization/serializables/core.py +262 -0
maxframe/serialization/serializables/field.py +624 -0
maxframe/serialization/serializables/field_type.py +589 -0
maxframe/serialization/serializables/tests/__init__.py +13 -0
maxframe/serialization/serializables/tests/test_field_type.py +121 -0
maxframe/serialization/serializables/tests/test_serializable.py +250 -0
maxframe/serialization/tests/__init__.py +13 -0
maxframe/serialization/tests/test_serial.py +412 -0
maxframe/session.py +1310 -0
maxframe/tensor/__init__.py +183 -0
maxframe/tensor/arithmetic/__init__.py +315 -0
maxframe/tensor/arithmetic/abs.py +68 -0
maxframe/tensor/arithmetic/absolute.py +68 -0
maxframe/tensor/arithmetic/add.py +82 -0
maxframe/tensor/arithmetic/angle.py +72 -0
maxframe/tensor/arithmetic/arccos.py +104 -0
maxframe/tensor/arithmetic/arccosh.py +91 -0
maxframe/tensor/arithmetic/arcsin.py +94 -0
maxframe/tensor/arithmetic/arcsinh.py +86 -0
maxframe/tensor/arithmetic/arctan.py +106 -0
maxframe/tensor/arithmetic/arctan2.py +128 -0
maxframe/tensor/arithmetic/arctanh.py +86 -0
maxframe/tensor/arithmetic/around.py +114 -0
maxframe/tensor/arithmetic/bitand.py +95 -0
maxframe/tensor/arithmetic/bitor.py +102 -0
maxframe/tensor/arithmetic/bitxor.py +95 -0
maxframe/tensor/arithmetic/cbrt.py +66 -0
maxframe/tensor/arithmetic/ceil.py +71 -0
maxframe/tensor/arithmetic/clip.py +165 -0
maxframe/tensor/arithmetic/conj.py +74 -0
maxframe/tensor/arithmetic/copysign.py +78 -0
maxframe/tensor/arithmetic/core.py +544 -0
maxframe/tensor/arithmetic/cos.py +85 -0
maxframe/tensor/arithmetic/cosh.py +72 -0
maxframe/tensor/arithmetic/deg2rad.py +72 -0
maxframe/tensor/arithmetic/degrees.py +77 -0
maxframe/tensor/arithmetic/divide.py +114 -0
maxframe/tensor/arithmetic/equal.py +76 -0
maxframe/tensor/arithmetic/exp.py +106 -0
maxframe/tensor/arithmetic/exp2.py +67 -0
maxframe/tensor/arithmetic/expm1.py +79 -0
maxframe/tensor/arithmetic/fabs.py +74 -0
maxframe/tensor/arithmetic/fix.py +69 -0
maxframe/tensor/arithmetic/float_power.py +103 -0
maxframe/tensor/arithmetic/floor.py +77 -0
maxframe/tensor/arithmetic/floordiv.py +94 -0
maxframe/tensor/arithmetic/fmax.py +105 -0
maxframe/tensor/arithmetic/fmin.py +106 -0
maxframe/tensor/arithmetic/fmod.py +99 -0
maxframe/tensor/arithmetic/frexp.py +92 -0
maxframe/tensor/arithmetic/greater.py +77 -0
maxframe/tensor/arithmetic/greater_equal.py +69 -0
maxframe/tensor/arithmetic/hypot.py +77 -0
maxframe/tensor/arithmetic/i0.py +89 -0
maxframe/tensor/arithmetic/imag.py +67 -0
maxframe/tensor/arithmetic/invert.py +110 -0
maxframe/tensor/arithmetic/isclose.py +115 -0
maxframe/tensor/arithmetic/iscomplex.py +64 -0
maxframe/tensor/arithmetic/isfinite.py +106 -0
maxframe/tensor/arithmetic/isinf.py +103 -0
maxframe/tensor/arithmetic/isnan.py +82 -0
maxframe/tensor/arithmetic/isreal.py +63 -0
maxframe/tensor/arithmetic/ldexp.py +99 -0
maxframe/tensor/arithmetic/less.py +69 -0
maxframe/tensor/arithmetic/less_equal.py +69 -0
maxframe/tensor/arithmetic/log.py +92 -0
maxframe/tensor/arithmetic/log10.py +85 -0
maxframe/tensor/arithmetic/log1p.py +95 -0
maxframe/tensor/arithmetic/log2.py +85 -0
maxframe/tensor/arithmetic/logaddexp.py +80 -0
maxframe/tensor/arithmetic/logaddexp2.py +78 -0
maxframe/tensor/arithmetic/logical_and.py +81 -0
maxframe/tensor/arithmetic/logical_not.py +74 -0
maxframe/tensor/arithmetic/logical_or.py +82 -0
maxframe/tensor/arithmetic/logical_xor.py +88 -0
maxframe/tensor/arithmetic/lshift.py +82 -0
maxframe/tensor/arithmetic/maximum.py +108 -0
maxframe/tensor/arithmetic/minimum.py +108 -0
maxframe/tensor/arithmetic/mod.py +104 -0
maxframe/tensor/arithmetic/modf.py +83 -0
maxframe/tensor/arithmetic/multiply.py +81 -0
maxframe/tensor/arithmetic/nan_to_num.py +99 -0
maxframe/tensor/arithmetic/negative.py +65 -0
maxframe/tensor/arithmetic/nextafter.py +68 -0
maxframe/tensor/arithmetic/not_equal.py +72 -0
maxframe/tensor/arithmetic/positive.py +47 -0
maxframe/tensor/arithmetic/power.py +106 -0
maxframe/tensor/arithmetic/rad2deg.py +71 -0
maxframe/tensor/arithmetic/radians.py +77 -0
maxframe/tensor/arithmetic/real.py +70 -0
maxframe/tensor/arithmetic/reciprocal.py +76 -0
maxframe/tensor/arithmetic/rint.py +68 -0
maxframe/tensor/arithmetic/rshift.py +81 -0
maxframe/tensor/arithmetic/setimag.py +29 -0
maxframe/tensor/arithmetic/setreal.py +29 -0
maxframe/tensor/arithmetic/sign.py +81 -0
maxframe/tensor/arithmetic/signbit.py +65 -0
maxframe/tensor/arithmetic/sin.py +98 -0
maxframe/tensor/arithmetic/sinc.py +102 -0
maxframe/tensor/arithmetic/sinh.py +93 -0
maxframe/tensor/arithmetic/spacing.py +72 -0
maxframe/tensor/arithmetic/sqrt.py +81 -0
maxframe/tensor/arithmetic/square.py +69 -0
maxframe/tensor/arithmetic/subtract.py +81 -0
maxframe/tensor/arithmetic/tan.py +88 -0
maxframe/tensor/arithmetic/tanh.py +92 -0
maxframe/tensor/arithmetic/tests/__init__.py +15 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
maxframe/tensor/arithmetic/truediv.py +104 -0
maxframe/tensor/arithmetic/trunc.py +72 -0
maxframe/tensor/arithmetic/utils.py +65 -0
maxframe/tensor/array_utils.py +186 -0
maxframe/tensor/base/__init__.py +34 -0
maxframe/tensor/base/astype.py +119 -0
maxframe/tensor/base/atleast_1d.py +74 -0
maxframe/tensor/base/broadcast_to.py +89 -0
maxframe/tensor/base/ravel.py +92 -0
maxframe/tensor/base/tests/__init__.py +13 -0
maxframe/tensor/base/tests/test_base.py +114 -0
maxframe/tensor/base/transpose.py +125 -0
maxframe/tensor/base/unique.py +205 -0
maxframe/tensor/base/where.py +127 -0
maxframe/tensor/core.py +724 -0
maxframe/tensor/datasource/__init__.py +32 -0
maxframe/tensor/datasource/arange.py +156 -0
maxframe/tensor/datasource/array.py +415 -0
maxframe/tensor/datasource/core.py +109 -0
maxframe/tensor/datasource/empty.py +169 -0
maxframe/tensor/datasource/from_dataframe.py +70 -0
maxframe/tensor/datasource/from_dense.py +54 -0
maxframe/tensor/datasource/from_sparse.py +47 -0
maxframe/tensor/datasource/full.py +186 -0
maxframe/tensor/datasource/ones.py +173 -0
maxframe/tensor/datasource/scalar.py +40 -0
maxframe/tensor/datasource/tests/__init__.py +13 -0
maxframe/tensor/datasource/tests/test_datasource.py +278 -0
maxframe/tensor/datasource/zeros.py +188 -0
maxframe/tensor/fetch/__init__.py +15 -0
maxframe/tensor/fetch/core.py +54 -0
maxframe/tensor/indexing/__init__.py +47 -0
maxframe/tensor/indexing/choose.py +196 -0
maxframe/tensor/indexing/compress.py +124 -0
maxframe/tensor/indexing/core.py +190 -0
maxframe/tensor/indexing/extract.py +71 -0
maxframe/tensor/indexing/fill_diagonal.py +183 -0
maxframe/tensor/indexing/flatnonzero.py +60 -0
maxframe/tensor/indexing/getitem.py +175 -0
maxframe/tensor/indexing/nonzero.py +120 -0
maxframe/tensor/indexing/setitem.py +132 -0
maxframe/tensor/indexing/slice.py +29 -0
maxframe/tensor/indexing/take.py +130 -0
maxframe/tensor/indexing/tests/__init__.py +15 -0
maxframe/tensor/indexing/tests/test_indexing.py +234 -0
maxframe/tensor/indexing/unravel_index.py +103 -0
maxframe/tensor/merge/__init__.py +15 -0
maxframe/tensor/merge/stack.py +132 -0
maxframe/tensor/merge/tests/__init__.py +13 -0
maxframe/tensor/merge/tests/test_merge.py +52 -0
maxframe/tensor/operators.py +123 -0
maxframe/tensor/random/__init__.py +168 -0
maxframe/tensor/random/beta.py +87 -0
maxframe/tensor/random/binomial.py +137 -0
maxframe/tensor/random/bytes.py +39 -0
maxframe/tensor/random/chisquare.py +110 -0
maxframe/tensor/random/choice.py +186 -0
maxframe/tensor/random/core.py +234 -0
maxframe/tensor/random/dirichlet.py +123 -0
maxframe/tensor/random/exponential.py +94 -0
maxframe/tensor/random/f.py +135 -0
maxframe/tensor/random/gamma.py +128 -0
maxframe/tensor/random/geometric.py +93 -0
maxframe/tensor/random/gumbel.py +167 -0
maxframe/tensor/random/hypergeometric.py +148 -0
maxframe/tensor/random/laplace.py +133 -0
maxframe/tensor/random/logistic.py +129 -0
maxframe/tensor/random/lognormal.py +159 -0
maxframe/tensor/random/logseries.py +122 -0
maxframe/tensor/random/multinomial.py +133 -0
maxframe/tensor/random/multivariate_normal.py +192 -0
maxframe/tensor/random/negative_binomial.py +125 -0
maxframe/tensor/random/noncentral_chisquare.py +132 -0
maxframe/tensor/random/noncentral_f.py +126 -0
maxframe/tensor/random/normal.py +143 -0
maxframe/tensor/random/pareto.py +140 -0
maxframe/tensor/random/permutation.py +104 -0
maxframe/tensor/random/poisson.py +111 -0
maxframe/tensor/random/power.py +142 -0
maxframe/tensor/random/rand.py +82 -0
maxframe/tensor/random/randint.py +121 -0
maxframe/tensor/random/randn.py +96 -0
maxframe/tensor/random/random_integers.py +123 -0
maxframe/tensor/random/random_sample.py +86 -0
maxframe/tensor/random/rayleigh.py +110 -0
maxframe/tensor/random/shuffle.py +61 -0
maxframe/tensor/random/standard_cauchy.py +105 -0
maxframe/tensor/random/standard_exponential.py +72 -0
maxframe/tensor/random/standard_gamma.py +120 -0
maxframe/tensor/random/standard_normal.py +74 -0
maxframe/tensor/random/standard_t.py +135 -0
maxframe/tensor/random/tests/__init__.py +15 -0
maxframe/tensor/random/tests/test_random.py +167 -0
maxframe/tensor/random/triangular.py +119 -0
maxframe/tensor/random/uniform.py +131 -0
maxframe/tensor/random/vonmises.py +131 -0
maxframe/tensor/random/wald.py +114 -0
maxframe/tensor/random/weibull.py +140 -0
maxframe/tensor/random/zipf.py +122 -0
maxframe/tensor/rechunk/__init__.py +26 -0
maxframe/tensor/rechunk/rechunk.py +43 -0
maxframe/tensor/reduction/__init__.py +66 -0
maxframe/tensor/reduction/all.py +103 -0
maxframe/tensor/reduction/allclose.py +88 -0
maxframe/tensor/reduction/any.py +105 -0
maxframe/tensor/reduction/argmax.py +103 -0
maxframe/tensor/reduction/argmin.py +103 -0
maxframe/tensor/reduction/array_equal.py +64 -0
maxframe/tensor/reduction/core.py +168 -0
maxframe/tensor/reduction/count_nonzero.py +81 -0
maxframe/tensor/reduction/cumprod.py +97 -0
maxframe/tensor/reduction/cumsum.py +101 -0
maxframe/tensor/reduction/max.py +120 -0
maxframe/tensor/reduction/mean.py +123 -0
maxframe/tensor/reduction/min.py +120 -0
maxframe/tensor/reduction/nanargmax.py +82 -0
maxframe/tensor/reduction/nanargmin.py +76 -0
maxframe/tensor/reduction/nancumprod.py +91 -0
maxframe/tensor/reduction/nancumsum.py +94 -0
maxframe/tensor/reduction/nanmax.py +111 -0
maxframe/tensor/reduction/nanmean.py +106 -0
maxframe/tensor/reduction/nanmin.py +111 -0
maxframe/tensor/reduction/nanprod.py +94 -0
maxframe/tensor/reduction/nanstd.py +126 -0
maxframe/tensor/reduction/nansum.py +115 -0
maxframe/tensor/reduction/nanvar.py +149 -0
maxframe/tensor/reduction/prod.py +130 -0
maxframe/tensor/reduction/std.py +134 -0
maxframe/tensor/reduction/sum.py +125 -0
maxframe/tensor/reduction/tests/__init__.py +13 -0
maxframe/tensor/reduction/tests/test_reduction.py +181 -0
maxframe/tensor/reduction/var.py +176 -0
maxframe/tensor/reshape/__init__.py +17 -0
maxframe/tensor/reshape/reshape.py +188 -0
maxframe/tensor/reshape/tests/__init__.py +15 -0
maxframe/tensor/reshape/tests/test_reshape.py +37 -0
maxframe/tensor/statistics/__init__.py +13 -0
maxframe/tensor/statistics/percentile.py +175 -0
maxframe/tensor/statistics/quantile.py +288 -0
maxframe/tensor/ufunc/__init__.py +26 -0
maxframe/tensor/ufunc/ufunc.py +200 -0
maxframe/tensor/utils.py +718 -0
maxframe/tests/__init__.py +13 -0
maxframe/tests/test_codegen.py +69 -0
maxframe/tests/test_protocol.py +144 -0
maxframe/tests/test_utils.py +376 -0
maxframe/tests/utils.py +164 -0
maxframe/typing_.py +37 -0
maxframe/udf.py +134 -0
maxframe/utils.py +1114 -0
maxframe-0.1.0b5.dist-info/METADATA +104 -0
maxframe-0.1.0b5.dist-info/RECORD +647 -0
maxframe-0.1.0b5.dist-info/WHEEL +5 -0
maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
maxframe_client/__init__.py +17 -0
maxframe_client/clients/__init__.py +13 -0
maxframe_client/clients/framedriver.py +118 -0
maxframe_client/clients/spe.py +104 -0
maxframe_client/conftest.py +15 -0
maxframe_client/fetcher.py +264 -0
maxframe_client/session/__init__.py +22 -0
maxframe_client/session/consts.py +36 -0
maxframe_client/session/graph.py +119 -0
maxframe_client/session/odps.py +482 -0
maxframe_client/session/task.py +280 -0
maxframe_client/session/tests/__init__.py +13 -0
maxframe_client/session/tests/test_task.py +85 -0
maxframe_client/tests/__init__.py +13 -0
maxframe_client/tests/test_fetcher.py +89 -0
maxframe_client/tests/test_session.py +255 -0

maxframe/dataframe/utils.py ADDED Viewed

@@ -0,0 +1,1267 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import inspect
+import itertools
+import logging
+import operator
+import sys
+from contextlib import contextmanager
+from numbers import Integral
+from typing import Any, Callable, List
+import numpy as np
+import pandas as pd
+from pandas.api.extensions import ExtensionDtype
+from pandas.api.types import is_string_dtype
+from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.inference import is_dict_like, is_list_like
+from ..core import Entity, ExecutableTuple
+from ..lib.mmh3 import hash as mmh_hash
+from ..utils import (
+    ModulePlaceholder,
+    is_full_slice,
+    lazy_import,
+    parse_version,
+    sbytes,
+    tokenize,
+)
+try:
+    import pyarrow as pa
+except ImportError:  # pragma: no cover
+    pa = ModulePlaceholder("pyarrow")
+cudf = lazy_import("cudf", rename="cudf")
+vineyard = lazy_import("vineyard")
+try:
+    import ray
+    ray_release_version = parse_version(ray.__version__).release
+    ray_deprecate_ml_dataset = ray_release_version[:2] >= (2, 0)
+except ImportError:
+    ray_release_version = None
+    ray_deprecate_ml_dataset = None
+logger = logging.getLogger(__name__)
+def hash_index(index, size):
+    def func(x, size):
+        return mmh_hash(sbytes(x)) % size
+    f = functools.partial(func, size=size)
+    idx_to_grouped = index.groupby(index.map(f))
+    return [idx_to_grouped.get(i, list()) for i in range(size)]
+def hash_dataframe_on(df, on, size, level=None):
+    if on is None:
+        idx = df.index
+        if level is not None:
+            idx = idx.to_frame(False)[level]
+        if cudf and isinstance(idx, cudf.Index):  # pragma: no cover
+            idx = idx.to_pandas()
+        hashed_label = pd.util.hash_pandas_object(idx, categorize=False)
+    elif callable(on):
+        # todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
+        hashed_label = pd.util.hash_pandas_object(df.index.map(on), categorize=False)
+    else:
+        if isinstance(on, list):
+            to_concat = []
+            for v in on:
+                if isinstance(v, pd.Series):
+                    to_concat.append(v)
+                else:
+                    to_concat.append(df[v])
+            data = pd.concat(to_concat, axis=1)
+        else:
+            data = df[on]
+        hashed_label = pd.util.hash_pandas_object(data, index=False, categorize=False)
+    idx_to_grouped = pd.RangeIndex(0, len(hashed_label)).groupby(hashed_label % size)
+    return [idx_to_grouped.get(i, pd.Index([])) for i in range(size)]
+def hash_dtypes(dtypes, size):
+    hashed_indexes = hash_index(dtypes.index, size)
+    return [dtypes[index] for index in hashed_indexes]
+def sort_dataframe_inplace(df, *axis):
+    for ax in axis:
+        df.sort_index(axis=ax, inplace=True)
+    return df
+@functools.lru_cache(1)
+def _get_range_index_type():
+    if cudf is not None:
+        return pd.RangeIndex, cudf.RangeIndex
+    else:
+        return pd.RangeIndex
+@functools.lru_cache(1)
+def _get_multi_index_type():
+    if cudf is not None:
+        return pd.MultiIndex, cudf.MultiIndex
+    else:
+        return pd.MultiIndex
+def _get_range_index_start(pd_range_index):
+    try:
+        return pd_range_index.start
+    except AttributeError:  # pragma: no cover
+        return pd_range_index._start
+def _get_range_index_stop(pd_range_index):
+    try:
+        return pd_range_index.stop
+    except AttributeError:  # pragma: no cover
+        return pd_range_index._stop
+def _get_range_index_step(pd_range_index):
+    try:
+        return pd_range_index.step
+    except AttributeError:  # pragma: no cover
+        pass
+    try:  # pragma: no cover
+        return pd_range_index._step
+    except AttributeError:  # pragma: no cover
+        return 1  # cudf does not support step arg
+def is_pd_range_empty(pd_range_index):
+    start, stop, step = (
+        _get_range_index_start(pd_range_index),
+        _get_range_index_stop(pd_range_index),
+        _get_range_index_step(pd_range_index),
+    )
+    return (start >= stop and step >= 0) or (start <= stop and step < 0)
+def parse_index(index_value, *args, store_data=False, key=None):
+    from .core import IndexValue
+    def _extract_property(index, tp, ret_data):
+        kw = {
+            "_min_val": _get_index_min(index),
+            "_max_val": _get_index_max(index),
+            "_min_val_close": True,
+            "_max_val_close": True,
+            "_key": key or _tokenize_index(index, *args),
+        }
+        if ret_data:
+            kw["_data"] = index.values
+        for field in tp._FIELDS:
+            if field in kw or field == "_data":
+                continue
+            val = getattr(index, field.lstrip("_"), None)
+            if val is not None:
+                kw[field] = val
+        return kw
+    def _tokenize_index(index, *token_objects):
+        if not index.empty:
+            return tokenize(index)
+        else:
+            return tokenize(index, *token_objects)
+    def _get_index_min(index):
+        try:
+            return index.min()
+        except (ValueError, AttributeError):
+            if isinstance(index, pd.IntervalIndex):
+                return None
+            raise
+        except TypeError:
+            return None
+    def _get_index_max(index):
+        try:
+            return index.max()
+        except (ValueError, AttributeError):
+            if isinstance(index, pd.IntervalIndex):
+                return None
+            raise
+        except TypeError:
+            return None
+    def _serialize_index(index):
+        tp = getattr(IndexValue, type(index).__name__)
+        properties = _extract_property(index, tp, store_data)
+        properties["_name"] = index.name
+        return tp(**properties)
+    def _serialize_range_index(index):
+        if is_pd_range_empty(index):
+            properties = {
+                "_is_monotonic_increasing": True,
+                "_is_monotonic_decreasing": False,
+                "_is_unique": True,
+                "_min_val": _get_index_min(index),
+                "_max_val": _get_index_max(index),
+                "_min_val_close": True,
+                "_max_val_close": False,
+                "_key": key or _tokenize_index(index, *args),
+                "_name": index.name,
+                "_dtype": index.dtype,
+            }
+        else:
+            properties = _extract_property(index, IndexValue.RangeIndex, False)
+        return IndexValue.RangeIndex(
+            _slice=slice(
+                _get_range_index_start(index),
+                _get_range_index_stop(index),
+                _get_range_index_step(index),
+            ),
+            **properties,
+        )
+    def _serialize_multi_index(index):
+        kw = _extract_property(index, IndexValue.MultiIndex, store_data)
+        kw["_sortorder"] = index.sortorder
+        kw["_dtypes"] = [lev.dtype for lev in index.levels]
+        return IndexValue.MultiIndex(**kw)
+    if index_value is None:
+        return IndexValue(
+            _index_value=IndexValue.Index(
+                _is_monotonic_increasing=False,
+                _is_monotonic_decreasing=False,
+                _is_unique=False,
+                _min_val=None,
+                _max_val=None,
+                _min_val_close=True,
+                _max_val_close=True,
+                _key=key or tokenize(*args),
+            )
+        )
+    if hasattr(index_value, "to_pandas"):  # pragma: no cover
+        # convert cudf.Index to pandas
+        index_value = index_value.to_pandas()
+    if isinstance(index_value, _get_range_index_type()):
+        return IndexValue(_index_value=_serialize_range_index(index_value))
+    elif isinstance(index_value, _get_multi_index_type()):
+        return IndexValue(_index_value=_serialize_multi_index(index_value))
+    else:
+        return IndexValue(_index_value=_serialize_index(index_value))
+def gen_unknown_index_value(index_value, *args):
+    pd_index = index_value.to_pandas()
+    if isinstance(pd_index, pd.RangeIndex):
+        return parse_index(pd.RangeIndex(-1), *args)
+    elif not isinstance(pd_index, pd.MultiIndex):
+        return parse_index(pd.Index([], dtype=pd_index.dtype), *args)
+    else:
+        i = pd.MultiIndex.from_arrays(
+            [c[:0] for c in pd_index.levels], names=pd_index.names
+        )
+        return parse_index(i, *args)
+def split_monotonic_index_min_max(
+    left_min_max, left_increase, right_min_max, right_increase
+):
+    """
+    Split the original two min_max into new min_max. Each min_max should be a list
+    in which each item should be a 4-tuple indicates that this chunk's min value,
+    whether the min value is close, the max value, and whether the max value is close.
+    The return value would be a nested list, each item is a list
+    indicates that how this chunk should be split into.
+    :param left_min_max: the left min_max
+    :param left_increase: if the original data of left is increased
+    :param right_min_max: the right min_max
+    :param right_increase: if the original data of right is increased
+    :return: nested list in which each item indicates how min_max is split
+    >>> left_min_max = [(0, True, 3, True), (4, True, 8, True), (12, True, 18, True),
+    ...                 (20, True, 22, True)]
+    >>> right_min_max = [(2, True, 6, True), (7, True, 9, True), (10, True, 14, True),
+    ...                  (18, True, 19, True)]
+    >>> l, r = split_monotonic_index_min_max(left_min_max, True, right_min_max, True)
+    >>> l
+    [[(0, True, 2, False), (2, True, 3, True)], [(3, False, 4, False), (4, True, 6, True), (6, False, 7, False),
+    (7, True, 8, True)], [(8, False, 9, True), (10, True, 12, False), (12, True, 14, True), (14, False, 18, False),
+    (18, True, 18, True)], [(18, False, 19, True), [20, True, 22, True]]]
+    >>> r
+    [[(0, True, 2, False), (2, True, 3, True), (3, False, 4, False), (4, True, 6, True)],
+    [(6, False, 7, False), (7, True, 8, True), (8, False, 9, True)], [(10, True, 12, False), (12, True, 14, True)],
+    [(14, False, 18, False), (18, True, 18, True), (18, False, 19, True), [20, True, 22, True]]]
+    """
+    left_idx_to_min_max = [[] for _ in left_min_max]
+    right_idx_to_min_max = [[] for _ in right_min_max]
+    left_curr_min_max = list(left_min_max[0])
+    right_curr_min_max = list(right_min_max[0])
+    left_curr_idx = right_curr_idx = 0
+    left_terminate = right_terminate = False
+    while not left_terminate or not right_terminate:
+        if left_terminate:
+            left_idx_to_min_max[left_curr_idx].append(tuple(right_curr_min_max))
+            right_idx_to_min_max[right_curr_idx].append(tuple(right_curr_min_max))
+            if right_curr_idx + 1 >= len(right_min_max):
+                right_terminate = True
+            else:
+                right_curr_idx += 1
+                right_curr_min_max = list(right_min_max[right_curr_idx])
+        elif right_terminate:
+            right_idx_to_min_max[right_curr_idx].append(tuple(left_curr_min_max))
+            left_idx_to_min_max[left_curr_idx].append(tuple(left_curr_min_max))
+            if left_curr_idx + 1 >= len(left_min_max):
+                left_terminate = True
+            else:
+                left_curr_idx += 1
+                left_curr_min_max = list(left_min_max[left_curr_idx])
+        elif left_curr_min_max[0] < right_curr_min_max[0]:
+            # left min < right min
+            right_min = [right_curr_min_max[0], not right_curr_min_max[1]]
+            max_val = min(left_curr_min_max[2:], right_min)
+            assert len(max_val) == 2
+            min_max = (
+                left_curr_min_max[0],
+                left_curr_min_max[1],
+                max_val[0],
+                max_val[1],
+            )
+            left_idx_to_min_max[left_curr_idx].append(min_max)
+            right_idx_to_min_max[right_curr_idx].append(min_max)
+            if left_curr_min_max[2:] == max_val:
+                # left max < right min
+                if left_curr_idx + 1 >= len(left_min_max):
+                    left_terminate = True
+                else:
+                    left_curr_idx += 1
+                    left_curr_min_max = list(left_min_max[left_curr_idx])
+            else:
+                # from left min(left min close) to right min(exclude right min close)
+                left_curr_min_max[:2] = right_curr_min_max[:2]
+        elif left_curr_min_max[0] > right_curr_min_max[0]:
+            # left min > right min
+            left_min = [left_curr_min_max[0], not left_curr_min_max[1]]
+            max_val = min(right_curr_min_max[2:], left_min)
+            min_max = (
+                right_curr_min_max[0],
+                right_curr_min_max[1],
+                max_val[0],
+                max_val[1],
+            )
+            left_idx_to_min_max[left_curr_idx].append(min_max)
+            right_idx_to_min_max[right_curr_idx].append(min_max)
+            if right_curr_min_max[2:] == max_val:
+                # right max < left min
+                if right_curr_idx + 1 >= len(right_min_max):
+                    right_terminate = True
+                else:
+                    right_curr_idx += 1
+                    right_curr_min_max = list(right_min_max[right_curr_idx])
+            else:
+                # from left min(left min close) to right min(exclude right min close)
+                right_curr_min_max[:2] = left_curr_min_max[:2]
+        else:
+            # left min == right min
+            max_val = min(left_curr_min_max[2:], right_curr_min_max[2:])
+            assert len(max_val) == 2
+            min_max = (
+                left_curr_min_max[0],
+                left_curr_min_max[1],
+                max_val[0],
+                max_val[1],
+            )
+            left_idx_to_min_max[left_curr_idx].append(min_max)
+            right_idx_to_min_max[right_curr_idx].append(min_max)
+            if max_val == left_curr_min_max[2:]:
+                if left_curr_idx + 1 >= len(left_min_max):
+                    left_terminate = True
+                else:
+                    left_curr_idx += 1
+                    left_curr_min_max = list(left_min_max[left_curr_idx])
+            else:
+                left_curr_min_max[:2] = max_val[0], not max_val[1]
+            if max_val == right_curr_min_max[2:]:
+                if right_curr_idx + 1 >= len(right_min_max):
+                    right_terminate = True
+                else:
+                    right_curr_idx += 1
+                    right_curr_min_max = list(right_min_max[right_curr_idx])
+            else:
+                right_curr_min_max[:2] = max_val[0], not max_val[1]
+    if left_increase is False:
+        left_idx_to_min_max = list(reversed(left_idx_to_min_max))
+    if right_increase is False:
+        right_idx_to_min_max = list(reversed(right_idx_to_min_max))
+    return left_idx_to_min_max, right_idx_to_min_max
+def build_split_idx_to_origin_idx(splits, increase=True):
+    # splits' len is equal to the original chunk size on a specified axis,
+    # splits is sth like [[(0, True, 2, True), (2, False, 3, True)]]
+    # which means there is one input chunk, and will be split into 2 out chunks
+    # in this function, we want to build a new dict from the out chunk index to
+    # the original chunk index and the inner position, like {0: (0, 0), 1: (0, 1)}
+    if increase is False:
+        splits = list(reversed(splits))
+    out_idx = itertools.count(0)
+    res = dict()
+    for origin_idx, _ in enumerate(splits):
+        for pos in range(len(splits[origin_idx])):
+            if increase is False:
+                o_idx = len(splits) - origin_idx - 1
+            else:
+                o_idx = origin_idx
+            res[next(out_idx)] = o_idx, pos
+    return res
+def _generate_value(dtype, fill_value):
+    # special handle for datetime64 and timedelta64
+    dispatch = {
+        np.datetime64: pd.Timestamp,
+        np.timedelta64: pd.Timedelta,
+        pd.CategoricalDtype.type: lambda x: pd.CategoricalDtype([x]),
+        # for object, we do not know the actual dtype,
+        # just convert to str for common usage
+        np.object_: lambda x: str(fill_value),
+    }
+    # otherwise, just use dtype.type itself to convert
+    convert = dispatch.get(dtype.type, dtype.type)
+    return convert(fill_value)
+def build_empty_df(dtypes, index=None):
+    columns = dtypes.index
+    length = len(index) if index is not None else 0
+    record = [[_generate_value(dtype, 1) for dtype in dtypes]] * max(1, length)
+    # duplicate column may exist,
+    # so use RangeIndex first
+    df = pd.DataFrame(record, columns=range(len(dtypes)), index=index)
+    for i, dtype in enumerate(dtypes):
+        s = df.iloc[:, i]
+        if not pd.api.types.is_dtype_equal(s.dtype, dtype):
+            df.iloc[:, i] = s.astype(dtype)
+    df.columns = columns
+    return df[:length] if len(df) > length else df
+def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
+    dfs = []
+    if not isinstance(size, (list, tuple)):
+        sizes = [size]
+    else:
+        sizes = size
+    if not isinstance(fill_value, (list, tuple)):
+        fill_values = [fill_value]
+    else:
+        fill_values = fill_value
+    from .core import SERIES_TYPE
+    dtypes = (
+        pd.Series([df_obj.dtype], index=[df_obj.name])
+        if isinstance(df_obj, SERIES_TYPE)
+        else df_obj.dtypes
+    )
+    for size, fill_value in zip(sizes, fill_values):
+        record = [[_generate_value(dtype, fill_value) for dtype in dtypes]] * size
+        df = pd.DataFrame(record)
+        df.columns = dtypes.index
+        if len(record) != 0:  # columns is empty in some cases
+            target_index = df_obj.index_value.to_pandas()
+            if isinstance(target_index, pd.MultiIndex):
+                index_val = tuple(
+                    _generate_value(level.dtype, fill_value)
+                    for level in target_index.levels
+                )
+                df.index = pd.MultiIndex.from_tuples(
+                    [index_val] * size, names=target_index.names
+                )
+            else:
+                index_val = _generate_value(target_index.dtype, fill_value)
+                df.index = pd.Index([index_val] * size, name=target_index.name)
+        # make sure dtypes correct
+        for i, dtype in enumerate(dtypes):
+            s = df.iloc[:, i]
+            if not pd.api.types.is_dtype_equal(s.dtype, dtype):
+                df[df.columns[i]] = s.astype(dtype)
+        dfs.append(df)
+    if len(dfs) == 1:
+        ret_df = dfs[0]
+    else:
+        ret_df = pd.concat(dfs)
+    if ensure_string:
+        obj_dtypes = dtypes[dtypes == np.dtype("O")]
+        ret_df[obj_dtypes.index] = ret_df[obj_dtypes.index].radd("O")
+    return ret_df
+def build_empty_series(dtype, index=None, name=None):
+    length = len(index) if index is not None else 0
+    return pd.Series(
+        [_generate_value(dtype, 1) for _ in range(length)],
+        dtype=dtype,
+        index=index,
+        name=name,
+    )
+def build_series(
+    series_obj=None,
+    fill_value=1,
+    size=1,
+    name=None,
+    ensure_string=False,
+    dtype=None,
+    index=None,
+):
+    seriess = []
+    if not isinstance(size, (list, tuple)):
+        sizes = [size]
+    else:
+        sizes = size
+    if not isinstance(fill_value, (list, tuple)):
+        fill_values = [fill_value]
+    else:
+        fill_values = fill_value
+    if series_obj is not None:
+        dtype = series_obj.dtype
+        try:
+            series_index = series_obj.index_value.to_pandas()[:0]
+        except AttributeError:
+            series_index = series_obj.index[:0]
+    else:
+        series_index = index[:0] if index is not None else None
+    for size, fill_value in zip(sizes, fill_values):
+        empty_series = build_empty_series(dtype, name=name, index=series_index)
+        record = _generate_value(dtype, fill_value)
+        if isinstance(empty_series.index, pd.MultiIndex):
+            index = tuple(
+                _generate_value(level.dtype, fill_value)
+                for level in empty_series.index.levels
+            )
+            empty_series = empty_series.reindex(
+                index=pd.MultiIndex.from_tuples([index], names=empty_series.index.names)
+            )
+            empty_series.iloc[0] = record
+        else:
+            if isinstance(empty_series.index.dtype, pd.CategoricalDtype):
+                index = None
+            else:
+                index = _generate_value(empty_series.index.dtype, fill_value)
+            empty_series.loc[index] = record
+        empty_series = pd.concat([empty_series] * size)
+        # make sure dtype correct for MultiIndex
+        empty_series = empty_series.astype(dtype, copy=False)
+        seriess.append(empty_series)
+    if len(seriess) == 1:
+        ret_series = seriess[0]
+    else:
+        ret_series = pd.concat(seriess)
+    if ensure_string and dtype == np.dtype("O"):
+        ret_series = ret_series.radd("O")
+    return ret_series
+def infer_index_value(left_index_value, right_index_value):
+    from .core import IndexValue
+    if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
+        right_index_value.value, IndexValue.RangeIndex
+    ):
+        if left_index_value.value.slice == right_index_value.value.slice:
+            return left_index_value
+        return parse_index(
+            pd.Index([], dtype=np.int64), left_index_value, right_index_value
+        )
+    # when left index and right index is identical, and both of them are elements unique,
+    # we can infer that the out index should be identical also
+    if (
+        left_index_value.is_unique
+        and right_index_value.is_unique
+        and left_index_value.key == right_index_value.key
+    ):
+        return left_index_value
+    left_index = left_index_value.to_pandas()
+    right_index = right_index_value.to_pandas()
+    out_index = pd.Index(
+        [], dtype=find_common_type([left_index.dtype, right_index.dtype])
+    )
+    return parse_index(out_index, left_index_value, right_index_value)
+def indexing_index_value(index_value, indexes, store_data=False, rechunk=False):
+    pd_index = index_value.to_pandas()
+    # when rechunk is True, the output index shall be treated
+    # different from the input one
+    if not rechunk and isinstance(indexes, slice) and is_full_slice(indexes):
+        return index_value
+    elif not index_value.has_value():
+        new_index_value = parse_index(pd_index, indexes, store_data=store_data)
+        new_index_value._index_value._min_val = index_value.min_val
+        new_index_value._index_value._min_val_close = index_value.min_val_close
+        new_index_value._index_value._max_val = index_value.max_val
+        new_index_value._index_value._max_val_close = index_value.max_val_close
+        return new_index_value
+    else:
+        if isinstance(indexes, Integral):
+            return parse_index(pd_index[[indexes]], store_data=store_data)
+        elif isinstance(indexes, Entity):
+            if isinstance(pd_index, pd.RangeIndex):
+                return parse_index(
+                    pd.RangeIndex(-1), indexes, index_value, store_data=False
+                )
+            else:
+                return parse_index(
+                    type(pd_index)([]), indexes, index_value, store_data=False
+                )
+        if isinstance(indexes, tuple):
+            return parse_index(pd_index[list(indexes)], store_data=store_data)
+        else:
+            return parse_index(pd_index[indexes], store_data=store_data)
+def merge_index_value(to_merge_index_values: dict, store_data: bool = False):
+    """
+    Merge index value according to their chunk index.
+    Parameters
+    ----------
+    to_merge_index_values : dict
+        index to index_value
+    store_data : bool
+        store data in index_value
+    Returns
+    -------
+    merged_index_value
+    """
+    pd_index = None
+    min_val, min_val_close, max_val, max_val_close = None, None, None, None
+    for _, chunk_index_value in sorted(to_merge_index_values.items()):
+        if pd_index is None:
+            pd_index = chunk_index_value.to_pandas()
+            min_val, min_val_close, max_val, max_val_close = (
+                chunk_index_value.min_val,
+                chunk_index_value.min_val_close,
+                chunk_index_value.max_val,
+                chunk_index_value.max_val_close,
+            )
+        else:
+            cur_pd_index = chunk_index_value.to_pandas()
+            if store_data or (
+                isinstance(pd_index, pd.RangeIndex)
+                and isinstance(cur_pd_index, pd.RangeIndex)
+                and cur_pd_index.step == pd_index.step
+                and cur_pd_index.start == pd_index.stop
+            ):
+                # range index that is continuous
+                pd_index = pd_index.append(cur_pd_index)
+            else:
+                pd_index = pd.Index([], dtype=pd_index.dtype)
+            if chunk_index_value.min_val is not None:
+                try:
+                    if min_val is None or min_val > chunk_index_value.min_val:
+                        min_val = chunk_index_value.min_val
+                        min_val_close = chunk_index_value.min_val_close
+                except TypeError:
+                    # min_value has different types that cannot compare
+                    # just stop compare
+                    continue
+            if chunk_index_value.max_val is not None:
+                if max_val is None or max_val < chunk_index_value.max_val:
+                    max_val = chunk_index_value.max_val
+                    max_val_close = chunk_index_value.max_val_close
+    index_value = parse_index(pd_index, store_data=store_data)
+    if not index_value.has_value():
+        index_value._index_value._min_val = min_val
+        index_value._index_value._min_val_close = min_val_close
+        index_value._index_value._max_val = max_val
+        index_value._index_value._max_val_close = max_val_close
+    return index_value
+def infer_dtypes(left_dtypes, right_dtypes, operator):
+    left = build_empty_df(left_dtypes)
+    right = build_empty_df(right_dtypes)
+    return operator(left, right).dtypes
+@functools.lru_cache(100)
+def infer_dtype(left_dtype, right_dtype, operator):
+    left = build_empty_series(left_dtype)
+    right = build_empty_series(right_dtype)
+    return operator(left, right).dtype
+def filter_dtypes(dtypes, column_min_max):
+    left_filter = operator.ge if column_min_max[1] else operator.gt
+    left = left_filter(dtypes.index, column_min_max[0])
+    right_filter = operator.le if column_min_max[3] else operator.lt
+    right = right_filter(dtypes.index, column_min_max[2])
+    return dtypes[left & right]
+def in_range_index(i, pd_range_index):
+    """
+    Check whether the input `i` is within `pd_range_index` which is a pd.RangeIndex.
+    """
+    start, stop, step = (
+        _get_range_index_start(pd_range_index),
+        _get_range_index_stop(pd_range_index),
+        _get_range_index_step(pd_range_index),
+    )
+    if step > 0 and start <= i < stop and (i - start) % step == 0:
+        return True
+    if step < 0 and start >= i > stop and (start - i) % step == 0:
+        return True
+    return False
+def wrap_notimplemented_exception(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except NotImplementedError:
+            return NotImplemented
+    return wrapper
+def validate_axis(axis, tileable=None):
+    if axis == "index":
+        axis = 0
+    elif axis == "columns":
+        axis = 1
+    illegal = False
+    try:
+        axis = operator.index(axis)
+        if axis < 0 or (tileable is not None and axis >= tileable.ndim):
+            illegal = True
+    except TypeError:
+        illegal = True
+    if illegal:
+        raise ValueError(f"No axis named {axis} for object type {type(tileable)}")
+    return axis
+def validate_axis_style_args(
+    data, args, kwargs, arg_name, method_name
+):  # pragma: no cover
+    """Argument handler for mixed index, columns / axis functions
+    In an attempt to handle both `.method(index, columns)`, and
+    `.method(arg, axis=.)`, we have to do some bad things to argument
+    parsing. This translates all arguments to `{index=., columns=.}` style.
+    Parameters
+    ----------
+    data : DataFrame
+    args : tuple
+        All positional arguments from the user
+    kwargs : dict
+        All keyword arguments from the user
+    arg_name, method_name : str
+        Used for better error messages
+    Returns
+    -------
+    kwargs : dict
+        A dictionary of keyword arguments. Doesn't modify ``kwargs``
+        inplace, so update them with the return value here.
+    """
+    out = {}
+    # Goal: fill 'out' with index/columns-style arguments
+    # like out = {'index': foo, 'columns': bar}
+    # Start by validating for consistency
+    axes_names = ["index"] if data.ndim == 1 else ["index", "columns"]
+    if "axis" in kwargs and any(x in kwargs for x in axes_names):
+        msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
+        raise TypeError(msg)
+    # First fill with explicit values provided by the user...
+    if arg_name in kwargs:
+        if args:
+            msg = f"{method_name} got multiple values for argument '{arg_name}'"
+            raise TypeError(msg)
+        axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
+        out[axis] = kwargs[arg_name]
+    # More user-provided arguments, now from kwargs
+    for k, v in kwargs.items():
+        try:
+            ax = axes_names[validate_axis(k, data)]
+        except ValueError:
+            pass
+        else:
+            out[ax] = v
+    # All user-provided kwargs have been handled now.
+    # Now we supplement with positional arguments, emitting warnings
+    # when there's ambiguity and raising when there's conflicts
+    if len(args) == 0:
+        pass  # It's up to the function to decide if this is valid
+    elif len(args) == 1:
+        axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
+        out[axis] = args[0]
+    elif len(args) == 2:
+        if "axis" in kwargs:
+            # Unambiguously wrong
+            msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
+            raise TypeError(msg)
+        msg = (
+            "Interpreting call\n\t'.{method_name}(a, b)' as "
+            "\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
+            "arguments to remove any ambiguity."
+        )
+        raise TypeError(msg.format(method_name=method_name))
+    else:
+        msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
+        raise TypeError(msg)
+    return out
+def validate_output_types(**kwargs):
+    from ..core import OutputType
+    output_type = kwargs.pop("object_type", None) or kwargs.pop("output_type", None)
+    output_types = kwargs.pop("output_types", None) or (
+        [output_type] if output_type is not None else None
+    )
+    return (
+        [
+            getattr(OutputType, v.lower()) if isinstance(v, str) else v
+            for v in output_types
+        ]
+        if output_types
+        else None
+    )
+def fetch_corner_data(df_or_series, session=None) -> pd.DataFrame:
+    """
+    Fetch corner DataFrame or Series for repr usage.
+    :param df_or_series: DataFrame or Series
+    :return: corner DataFrame
+    """
+    from .indexing.iloc import iloc
+    max_rows = pd.get_option("display.max_rows")
+    try:
+        min_rows = pd.get_option("display.min_rows")
+        min_rows = min(min_rows, max_rows)
+    except KeyError:  # pragma: no cover
+        # display.min_rows is introduced in pandas 0.25
+        min_rows = max_rows
+    index_size = None
+    if (
+        df_or_series.shape[0] > max_rows
+        and df_or_series.shape[0] > min_rows // 2 * 2 + 2
+    ):
+        # for pandas, greater than max_rows
+        # will display min_rows
+        # thus we fetch min_rows + 2 lines
+        index_size = min_rows // 2 + 1
+    if index_size is None:
+        return df_or_series._fetch(session=session)
+    else:
+        head = iloc(df_or_series)[:index_size]
+        tail = iloc(df_or_series)[-index_size:]
+        head_data, tail_data = ExecutableTuple([head, tail]).fetch(session=session)
+        xdf = cudf if head.op.is_gpu() else pd
+        return xdf.concat([head_data, tail_data], axis="index")
+class ReprSeries(pd.Series):
+    def __init__(self, corner_data, real_shape):
+        super().__init__(corner_data)
+        self._real_shape = real_shape
+    def __len__(self):
+        # As we only fetch corner data to repr,
+        # the length would be wrong and we have no way to control,
+        # thus we just overwrite the length to show the real one
+        return self._real_shape[0]
+def filter_dtypes_by_index(dtypes, index):
+    try:
+        new_dtypes = dtypes.loc[index].dropna()
+    except KeyError:
+        dtypes_idx = (
+            dtypes.index.to_frame()
+            .merge(index.to_frame())
+            .set_index(list(range(dtypes.index.nlevels)))
+            .index
+        )
+        new_dtypes = dtypes.loc[dtypes_idx]
+        new_dtypes.index.names = dtypes.index.names
+    return new_dtypes
+@contextmanager
+def create_sa_connection(con, **kwargs):
+    import sqlalchemy as sa
+    from sqlalchemy.engine import Connection, Engine
+    # process con
+    engine = None
+    if isinstance(con, Connection):
+        # connection create by user
+        close = False
+        dispose = False
+    elif isinstance(con, Engine):
+        con = con.connect()
+        close = True
+        dispose = False
+    else:
+        engine = sa.create_engine(con, **kwargs)
+        con = engine.connect()
+        close = True
+        dispose = True
+    try:
+        yield con
+    finally:
+        if close:
+            con.close()
+        if dispose:
+            engine.dispose()
+def to_arrow_dtypes(dtypes, test_df=None):
+    from .arrays import ArrowStringDtype
+    new_dtypes = dtypes.copy()
+    for i in range(len(dtypes)):
+        dtype = dtypes.iloc[i]
+        if is_string_dtype(dtype):
+            if test_df is not None:
+                series = test_df.iloc[:, i]
+                # check value
+                non_na_series = series[series.notna()]
+                if len(non_na_series) > 0:
+                    first_value = non_na_series.iloc[0]
+                    if isinstance(first_value, str):
+                        new_dtypes.iloc[i] = ArrowStringDtype()
+                else:  # pragma: no cover
+                    # empty, set arrow string dtype
+                    new_dtypes.iloc[i] = ArrowStringDtype()
+            else:
+                # empty, set arrow string dtype
+                new_dtypes.iloc[i] = ArrowStringDtype()
+    return new_dtypes
+def make_dtype(dtype):
+    if isinstance(dtype, (np.dtype, ExtensionDtype)):
+        return dtype
+    return np.dtype(dtype) if dtype is not None else None
+def make_dtypes(dtypes):
+    if dtypes is None:
+        return None
+    if not isinstance(dtypes, pd.Series):
+        dtypes = pd.Series(dtypes)
+    return dtypes.apply(make_dtype)
+def is_dataframe(x):
+    if cudf is not None:  # pragma: no cover
+        if isinstance(x, cudf.DataFrame):
+            return True
+    return isinstance(x, pd.DataFrame)
+def is_series(x):
+    if cudf is not None:  # pragma: no cover
+        if isinstance(x, cudf.Series):
+            return True
+    return isinstance(x, pd.Series)
+def is_index(x):
+    if cudf is not None:  # pragma: no cover
+        if isinstance(x, cudf.Index):
+            return True
+    return isinstance(x, pd.Index)
+def get_xdf(x):
+    if cudf is not None:  # pragma: no cover
+        if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
+            return cudf
+    return pd
+def is_cudf(x):
+    if cudf is not None:  # pragma: no cover
+        if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
+            return True
+    return False
+def whether_to_clean_up(op, threshold):
+    func = op.func
+    counted_bytes = 0
+    max_recursion_depth = 2
+    from collections import deque
+    from numbers import Number
+    BYPASS_CLASSES = (str, bytes, Number, range, bytearray, pd.DataFrame, pd.Series)
+    class GetSizeEarlyStopException(Exception):
+        pass
+    def check_exceed_threshold():
+        nonlocal threshold, counted_bytes
+        if counted_bytes >= threshold:
+            raise GetSizeEarlyStopException()
+    def getsize(obj_outer):
+        _seen_obj_ids = set()
+        def inner_count(obj, recursion_depth):
+            obj_id = id(obj)
+            if obj_id in _seen_obj_ids or recursion_depth > max_recursion_depth:
+                return 0
+            _seen_obj_ids.add(obj_id)
+            recursion_depth += 1
+            size = sys.getsizeof(obj)
+            if isinstance(obj, BYPASS_CLASSES):
+                return size
+            elif isinstance(obj, (tuple, list, set, deque)):
+                size += sum(inner_count(i, recursion_depth) for i in obj)
+            elif hasattr(obj, "items"):
+                size += sum(
+                    inner_count(k, recursion_depth) + inner_count(v, recursion_depth)
+                    for k, v in getattr(obj, "items")()
+                )
+            if hasattr(obj, "__dict__"):
+                size += inner_count(vars(obj), recursion_depth)
+            if hasattr(obj, "__slots__"):
+                size += sum(
+                    inner_count(getattr(obj, s), recursion_depth)
+                    for s in obj.__slots__
+                    if hasattr(obj, s)
+                )
+            return size
+        return inner_count(obj_outer, 0)
+    try:
+        # Note: In most cases, func is just a function with closure, while chances are that
+        # func is a callable that doesn't have __closure__ attribute.
+        if inspect.isclass(func):
+            pass
+        elif hasattr(func, "__closure__") and func.__closure__ is not None:
+            for cell in func.__closure__:
+                counted_bytes += getsize(cell.cell_contents)
+                check_exceed_threshold()
+        elif callable(func):
+            if hasattr(func, "__dict__"):
+                for k, v in func.__dict__.items():
+                    counted_bytes += sum([getsize(k), getsize(v)])
+                    check_exceed_threshold()
+            if hasattr(func, "__slots__"):
+                for slot in func.__slots__:
+                    counted_bytes += (
+                        getsize(getattr(func, slot)) if hasattr(func, slot) else 0
+                    )
+                    check_exceed_threshold()
+    except GetSizeEarlyStopException:
+        logger.debug("Func needs cleanup.")
+        op.need_clean_up_func = True
+    else:
+        assert op.need_clean_up_func is False
+        logger.debug("Func doesn't need cleanup.")
+    return op.need_clean_up_func
+def concat_on_columns(objs: List) -> Any:
+    xdf = get_xdf(objs[0])
+    # In cudf, concat with axis=1 and ignore_index=False by default behaves opposite to pandas.
+    # Cudf would reset the index when axis=1 and ignore_index=False, which does not match with its document.
+    # Therefore, we deal with this case specially.
+    result = xdf.concat(objs, axis=1)
+    if xdf is cudf:
+        result.index = objs[0].index
+    return result
+def apply_if_callable(maybe_callable, obj, **kwargs):
+    if callable(maybe_callable):
+        return maybe_callable(obj, **kwargs)
+    return maybe_callable
+def patch_sa_engine_execute():
+    """
+    pandas did not resolve compatibility issue of sqlalchemy 2.0, the issue
+    is https://github.com/pandas-dev/pandas/issues/40686. We need to patch
+    Engine class in SQLAlchemy, and then our code can work well.
+    """
+    try:
+        from sqlalchemy.engine import Engine
+    except ImportError:  # pragma: no cover
+        return
+    def execute(self, statement, *multiparams, **params):
+        connection = self.connect()
+        return connection.execute(statement, *multiparams, **params)
+    if hasattr(Engine, "execute"):  # pragma: no cover
+        return
+    Engine.execute = execute
+def pack_func_args(df, funcs, *args, **kwargs) -> Any:
+    """
+    Pack the funcs with args and kwargs to avoid the ambiguity between other
+    positional and keyword arguments. It will process the funcs by the following rule:
+    1. If there's no such args and kwargs, return funcs itself.
+    2. If the funcs is a dict-like object, it will iterate each key-value pair, pack the
+    value recursively, and return a new dict with the same keys and packed values.
+    3. If the funcs is a list-like object, it will iterate each element, pack it
+    recursively, and return a new list with the packed elements.
+    4. If the funcs is a str object, it will try to get the attribute df.funcs firstly,
+    if it exists and is a callable, return a partial one with args and kwargs packed in.
+    If it exists but isn't a callable, a ValueError is raised. If it doesn't exist, then
+    try to get the attribute of np.funcs, if it exists and df is acceptable by funcs,
+    return a partial one with args and kwargs packed in, otherwise an AttributeValue is
+    raised. This rule is almost the same with pandas.
+    5. Other cases are treated as funcs being a callable, returns the partial one with
+    args and kwargs packed in.
+    Parameters
+    ----------
+    df : pandas.DataFrame or pandas.Series
+        The DataFrame or Series object to test the function.
+    funcs : function, str, list-like or dict-like
+        Function to pack. It should have the same type with Dataframe.transform().
+    *args :
+        The positional arguments to func. If funcs contains many functions, each one
+        should be able to accept *args.
+    **kwargs :
+        The keyword arguments to func. If funcs contains many functions, each one
+        should be able to accept **kwargs.
+    Returns
+    -------
+    The packed functions having the same structure with funcs.
+    Raises
+    ------
+    ValueError :
+        If there's a string but the corresponding function doesn't accept any positional
+        or keyword arguments.
+    AttributeError :
+        If there's a string but no corresponding function is found.
+    """
+    if not args and not kwargs:
+        return funcs
+    if is_dict_like(funcs):
+        return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
+    if is_list_like(funcs):
+        return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
+    f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
+    # Callable
+    return functools.partial(f, *args, **kwargs)
+def get_callable_by_name(df: Any, func_name: str) -> Callable:
+    """
+    Get the callable by the func name.
+    It will try to get the attribute df.funcs firstly, if it exists and is a callable,
+    return it. If it exists but isn't a callable, a ValueError is raised. If it doesn't
+    exist, then try to get the attribute of np.funcs, if it exists and df is acceptable
+    by funcs, return a partial one with args and kwargs packed in, otherwise an
+    AttributeValue is raised. This rule is almost the same with pandas.
+    Parameters
+    ----------
+    df: padnas.Series or pandas.Dataframe
+        The receiver of the func name.
+    func_name : str
+        The func name.
+    Returns
+    -------
+    The callable instance.
+    Raises
+    ------
+    ValueError :
+        If it's not a valid callable.
+    AttributeError :
+        If there's no corresponding function is found.
+    """
+    if hasattr(df, func_name):
+        f = getattr(df, func_name)
+        if callable(f):
+            return f
+        raise ValueError(f"{func_name} is not a callable")
+    if hasattr(np, func_name) and hasattr(df, "__array__"):
+        return getattr(np, func_name)
+    raise AttributeError(
+        f"'{func_name}' is not a valid function for '{type(df).__name__}' object"
+    )