maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Callable, MutableMapping, Union
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import OutputType
|
|
22
|
+
from ...serialization.serializables import (
|
|
23
|
+
BoolField,
|
|
24
|
+
DictField,
|
|
25
|
+
FunctionField,
|
|
26
|
+
TupleField,
|
|
27
|
+
)
|
|
28
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
29
|
+
from ...utils import make_dtypes
|
|
30
|
+
from ..core import DataFrame
|
|
31
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
32
|
+
from ..utils import copy_func_scheduling_hints, gen_unknown_index_value, parse_index
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
36
|
+
_op_type_ = opcodes.FLATMAP
|
|
37
|
+
|
|
38
|
+
func = FunctionField("func")
|
|
39
|
+
raw = BoolField("raw", default=False)
|
|
40
|
+
args = TupleField("args", default=())
|
|
41
|
+
kwargs = DictField("kwargs", default={})
|
|
42
|
+
|
|
43
|
+
def __init__(self, output_types=None, **kw):
|
|
44
|
+
super().__init__(_output_types=output_types, **kw)
|
|
45
|
+
if hasattr(self, "func"):
|
|
46
|
+
copy_func_scheduling_hints(self.func, self)
|
|
47
|
+
|
|
48
|
+
def has_custom_code(self) -> bool:
|
|
49
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
50
|
+
|
|
51
|
+
def _call_dataframe(self, df: DataFrame, dtypes: pd.Series):
|
|
52
|
+
dtypes = make_dtypes(dtypes)
|
|
53
|
+
index_value = gen_unknown_index_value(
|
|
54
|
+
df.index_value,
|
|
55
|
+
(df.key, df.index_value.key, self.func),
|
|
56
|
+
normalize_range_index=True,
|
|
57
|
+
)
|
|
58
|
+
return self.new_dataframe(
|
|
59
|
+
[df],
|
|
60
|
+
shape=(np.nan, len(dtypes)),
|
|
61
|
+
index_value=index_value,
|
|
62
|
+
columns_value=parse_index(dtypes.index, store_data=True),
|
|
63
|
+
dtypes=dtypes,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def _call_series_or_index(self, series, dtypes=None):
|
|
67
|
+
index_value = gen_unknown_index_value(
|
|
68
|
+
series.index_value,
|
|
69
|
+
(series.key, series.index_value.key, self.func),
|
|
70
|
+
normalize_range_index=True,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if self.output_types[0] == OutputType.series:
|
|
74
|
+
name, dtype = dtypes
|
|
75
|
+
return self.new_series(
|
|
76
|
+
[series],
|
|
77
|
+
dtype=dtype,
|
|
78
|
+
shape=(np.nan,),
|
|
79
|
+
index_value=index_value,
|
|
80
|
+
name=name,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
dtypes = make_dtypes(dtypes)
|
|
84
|
+
columns_value = parse_index(dtypes.index, store_data=True)
|
|
85
|
+
return self.new_dataframe(
|
|
86
|
+
[series],
|
|
87
|
+
shape=(np.nan, len(dtypes)),
|
|
88
|
+
index_value=index_value,
|
|
89
|
+
columns_value=columns_value,
|
|
90
|
+
dtypes=dtypes,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def __call__(
|
|
94
|
+
self,
|
|
95
|
+
df_or_series,
|
|
96
|
+
dtypes=None,
|
|
97
|
+
output_type=None,
|
|
98
|
+
):
|
|
99
|
+
if df_or_series.op.output_types[0] == OutputType.dataframe:
|
|
100
|
+
return self._call_dataframe(df_or_series, dtypes=dtypes)
|
|
101
|
+
else:
|
|
102
|
+
return self._call_series_or_index(df_or_series, dtypes=dtypes)
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def estimate_size(
|
|
106
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFlatMapOperator"
|
|
107
|
+
) -> None:
|
|
108
|
+
if isinstance(op.func, MarkedFunction):
|
|
109
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
110
|
+
super().estimate_size(ctx, op)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def df_flatmap(dataframe, func: Callable, dtypes=None, raw=False, args=(), **kwargs):
|
|
114
|
+
"""
|
|
115
|
+
Apply the given function to each row and then flatten results. Use this method if your transformation returns
|
|
116
|
+
multiple rows for each input row.
|
|
117
|
+
|
|
118
|
+
This function applies a transformation to each row of the DataFrame, where the transformation can return zero
|
|
119
|
+
or multiple values, effectively flattening Python generators, list-like collections, and DataFrames.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
func : Callable
|
|
124
|
+
Function to apply to each row of the DataFrame. It should accept a Series (or an array if `raw=True`)
|
|
125
|
+
representing a row and return a list or iterable of values.
|
|
126
|
+
|
|
127
|
+
dtypes : Series, dict or list
|
|
128
|
+
Specify dtypes of returned DataFrame.
|
|
129
|
+
|
|
130
|
+
raw : bool, default False
|
|
131
|
+
Determines if the row is passed as a Series or as a numpy array:
|
|
132
|
+
|
|
133
|
+
* ``False`` : passes each row as a Series to the function.
|
|
134
|
+
* ``True`` : the passed function will receive numpy array objects instead.
|
|
135
|
+
|
|
136
|
+
args : tuple
|
|
137
|
+
Positional arguments to pass to `func`.
|
|
138
|
+
|
|
139
|
+
**kwargs
|
|
140
|
+
Additional keyword arguments to pass as keywords arguments to `func`.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
DataFrame
|
|
145
|
+
Return DataFrame with specified `dtypes`.
|
|
146
|
+
|
|
147
|
+
Notes
|
|
148
|
+
-----
|
|
149
|
+
The ``func`` must return an iterable of values for each input row. The index of the resulting DataFrame will be
|
|
150
|
+
repeated based on the number of output rows generated by `func`.
|
|
151
|
+
|
|
152
|
+
Examples
|
|
153
|
+
--------
|
|
154
|
+
>>> import numpy as np
|
|
155
|
+
>>> import maxframe.dataframe as md
|
|
156
|
+
>>> df = md.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
|
|
157
|
+
>>> df.execute()
|
|
158
|
+
A B
|
|
159
|
+
0 1 4
|
|
160
|
+
1 2 5
|
|
161
|
+
2 3 6
|
|
162
|
+
|
|
163
|
+
Define a function that takes a number and returns a list of two numbers:
|
|
164
|
+
|
|
165
|
+
>>> def generate_values_array(row):
|
|
166
|
+
... return [row['A'] * 2, row['B'] * 3]
|
|
167
|
+
|
|
168
|
+
Define a function that takes a row and return two rows and two columns:
|
|
169
|
+
|
|
170
|
+
>>> def generate_values_in_generator(row):
|
|
171
|
+
... yield [row[0] * 2, row[1] * 4]
|
|
172
|
+
... yield [row[0] * 3, row[1] * 5]
|
|
173
|
+
|
|
174
|
+
Which equals to the following function return a dataframe:
|
|
175
|
+
|
|
176
|
+
>>> def generate_values_in_dataframe(row):
|
|
177
|
+
... return pd.DataFrame([[row[0] * 2, row[1] * 4], [row[0] * 3, row[1] * 5]])
|
|
178
|
+
|
|
179
|
+
Specify `dtypes` with a function which returns a DataFrame:
|
|
180
|
+
|
|
181
|
+
>>> df.mf.flatmap(generate_values_array, dtypes=pd.Series({'A': 'int'})).execute()
|
|
182
|
+
A
|
|
183
|
+
0 2
|
|
184
|
+
0 12
|
|
185
|
+
1 4
|
|
186
|
+
1 15
|
|
187
|
+
2 6
|
|
188
|
+
2 18
|
|
189
|
+
|
|
190
|
+
Specify raw=True to pass input row as array:
|
|
191
|
+
|
|
192
|
+
>>> df.mf.flatmap(generate_values_in_generator, dtypes={"A": "int", "B": "int"}, raw=True).execute()
|
|
193
|
+
A B
|
|
194
|
+
0 2 16
|
|
195
|
+
0 3 20
|
|
196
|
+
1 4 20
|
|
197
|
+
1 6 25
|
|
198
|
+
2 6 24
|
|
199
|
+
2 9 30
|
|
200
|
+
"""
|
|
201
|
+
if dtypes is None or len(dtypes) == 0:
|
|
202
|
+
raise TypeError(
|
|
203
|
+
"Cannot determine {dtypes} by calculating with enumerate data, "
|
|
204
|
+
"please specify it as arguments"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
if not isinstance(func, Callable):
|
|
208
|
+
raise TypeError("function must be a callable object")
|
|
209
|
+
|
|
210
|
+
output_types = [OutputType.dataframe]
|
|
211
|
+
op = DataFrameFlatMapOperator(
|
|
212
|
+
func=func, raw=raw, output_types=output_types, args=args, kwargs=kwargs
|
|
213
|
+
)
|
|
214
|
+
return op(
|
|
215
|
+
dataframe,
|
|
216
|
+
dtypes=dtypes,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def series_flatmap(
|
|
221
|
+
series, func: Callable, dtypes=None, dtype=None, name=None, args=(), **kwargs
|
|
222
|
+
):
|
|
223
|
+
"""
|
|
224
|
+
Apply the given function to each row and then flatten results. Use this method if your transformation returns
|
|
225
|
+
multiple rows for each input row.
|
|
226
|
+
|
|
227
|
+
This function applies a transformation to each element of the Series, where the transformation can return zero
|
|
228
|
+
or multiple values, effectively flattening Python generator, list-liked collections and DataFrame.
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
func : Callable
|
|
233
|
+
Function to apply to each element of the Series. It should accept a scalar value
|
|
234
|
+
(or an array if ``raw=True``) and return a list or iterable of values.
|
|
235
|
+
|
|
236
|
+
dtypes : Series, default None
|
|
237
|
+
Specify dtypes of returned DataFrame. Can't work with dtype.
|
|
238
|
+
|
|
239
|
+
dtype : numpy.dtype, default None
|
|
240
|
+
Specify dtype of returned Series. Can't work with dtypes.
|
|
241
|
+
|
|
242
|
+
name : str, default None
|
|
243
|
+
Specify name of the returned Series.
|
|
244
|
+
|
|
245
|
+
args : tuple
|
|
246
|
+
Positional arguments to pass to ``func``.
|
|
247
|
+
|
|
248
|
+
**kwargs
|
|
249
|
+
Additional keyword arguments to pass as keywords arguments to ``func``.
|
|
250
|
+
|
|
251
|
+
Returns
|
|
252
|
+
-------
|
|
253
|
+
DataFrame or Series
|
|
254
|
+
Result of DataFrame when dtypes specified, else Series.
|
|
255
|
+
|
|
256
|
+
Notes
|
|
257
|
+
-----
|
|
258
|
+
The ``func`` must return an iterable of values for each input element. If ``dtypes`` is specified,
|
|
259
|
+
`flatmap` will return a DataFrame, if ``dtype`` and ``name`` is specified, a Series will be returned.
|
|
260
|
+
|
|
261
|
+
The index of the resulting DataFrame/Series will be repeated based on the number of output rows generated
|
|
262
|
+
by ``func``.
|
|
263
|
+
|
|
264
|
+
Examples
|
|
265
|
+
--------
|
|
266
|
+
>>> import numpy as np
|
|
267
|
+
>>> import maxframe.dataframe as md
|
|
268
|
+
>>> df = md.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
|
|
269
|
+
>>> df.execute()
|
|
270
|
+
A B
|
|
271
|
+
0 1 4
|
|
272
|
+
1 2 5
|
|
273
|
+
2 3 6
|
|
274
|
+
|
|
275
|
+
Define a function that takes a number and returns a list of two numbers:
|
|
276
|
+
|
|
277
|
+
>>> def generate_values_array(x):
|
|
278
|
+
... return [x * 2, x * 3]
|
|
279
|
+
|
|
280
|
+
Specify ``dtype`` with a function which returns list to return more elements as a Series:
|
|
281
|
+
|
|
282
|
+
>>> df['A'].mf.flatmap(generate_values_array, dtype="int", name="C").execute()
|
|
283
|
+
0 2
|
|
284
|
+
0 3
|
|
285
|
+
1 4
|
|
286
|
+
1 6
|
|
287
|
+
2 6
|
|
288
|
+
2 9
|
|
289
|
+
Name: C, dtype: int64
|
|
290
|
+
|
|
291
|
+
Specify ``dtypes`` to return multi columns as a DataFrame:
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
>>> def generate_values_in_generator(x):
|
|
295
|
+
... yield pd.Series([x * 2, x * 4])
|
|
296
|
+
... yield pd.Series([x * 3, x * 5])
|
|
297
|
+
|
|
298
|
+
>>> df['A'].mf.flatmap(generate_values_in_generator, dtypes={"A": "int", "B": "int"}).execute()
|
|
299
|
+
A B
|
|
300
|
+
0 2 4
|
|
301
|
+
0 3 5
|
|
302
|
+
1 4 8
|
|
303
|
+
1 6 10
|
|
304
|
+
2 6 12
|
|
305
|
+
2 9 15
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
if dtypes is not None and dtype is not None:
|
|
309
|
+
raise ValueError("Both dtypes and dtype cannot be specified at the same time.")
|
|
310
|
+
|
|
311
|
+
dtypes = (name, dtype) if dtype is not None else dtypes
|
|
312
|
+
if dtypes is None:
|
|
313
|
+
raise TypeError(
|
|
314
|
+
"Cannot determine {dtypes} or {dtype} by calculating with enumerate data, "
|
|
315
|
+
"please specify it as arguments"
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
if not isinstance(func, Callable):
|
|
319
|
+
raise TypeError("function must be a callable object")
|
|
320
|
+
|
|
321
|
+
output_type = OutputType.series if dtype is not None else OutputType.dataframe
|
|
322
|
+
|
|
323
|
+
op = DataFrameFlatMapOperator(
|
|
324
|
+
func=func, raw=False, output_types=[output_type], args=args, kwargs=kwargs
|
|
325
|
+
)
|
|
326
|
+
return op(
|
|
327
|
+
series,
|
|
328
|
+
dtypes=dtypes,
|
|
329
|
+
)
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import inspect
|
|
16
|
+
from typing import Any, Callable, List, Optional, Union
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _has_end_arg(func) -> bool:
|
|
22
|
+
f_args = inspect.getfullargspec(func)
|
|
23
|
+
return "end" in f_args.args or "end" in f_args.kwonlyargs
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _gen_combined_mapper(
|
|
27
|
+
mapper: Callable,
|
|
28
|
+
combiner: Callable,
|
|
29
|
+
group_cols: List[Any],
|
|
30
|
+
order_cols: List[Any],
|
|
31
|
+
ascending: Union[bool, List[bool]] = True,
|
|
32
|
+
):
|
|
33
|
+
class CombinedMapper:
|
|
34
|
+
def __init__(self):
|
|
35
|
+
if isinstance(mapper, type):
|
|
36
|
+
self.f = mapper()
|
|
37
|
+
else:
|
|
38
|
+
self.f = mapper
|
|
39
|
+
|
|
40
|
+
if isinstance(combiner, type):
|
|
41
|
+
self.combiner = combiner()
|
|
42
|
+
else:
|
|
43
|
+
self.combiner = combiner
|
|
44
|
+
|
|
45
|
+
def _combine_mapper_result(self, mapper_result, end=False):
|
|
46
|
+
if mapper_result is None:
|
|
47
|
+
return None
|
|
48
|
+
res = mapper_result
|
|
49
|
+
if order_cols:
|
|
50
|
+
res = mapper_result.sort_values(order_cols, ascending=ascending)
|
|
51
|
+
|
|
52
|
+
kw = {"end": end} if _has_end_arg(self.combiner) else {}
|
|
53
|
+
gcols = group_cols or list(res.columns)
|
|
54
|
+
return res.groupby(gcols, group_keys=False)[list(res.columns)].apply(
|
|
55
|
+
self.combiner, **kw
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def __call__(self, batch, end=False):
|
|
59
|
+
kw = {"end": end} if _has_end_arg(self.f) else {}
|
|
60
|
+
f_ret = self.f(batch, **kw)
|
|
61
|
+
return self._combine_mapper_result(f_ret, end=end)
|
|
62
|
+
|
|
63
|
+
def close(self) -> None:
|
|
64
|
+
if hasattr(self.f, "close"):
|
|
65
|
+
self.f.close()
|
|
66
|
+
if hasattr(self.combiner, "close"):
|
|
67
|
+
self.combiner.close()
|
|
68
|
+
|
|
69
|
+
return CombinedMapper
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def map_reduce(
|
|
73
|
+
df,
|
|
74
|
+
mapper: Optional[Callable] = None,
|
|
75
|
+
reducer: Optional[Callable] = None,
|
|
76
|
+
group_cols: Optional[List[Any]] = None,
|
|
77
|
+
*,
|
|
78
|
+
order_cols: List[Any] = None,
|
|
79
|
+
ascending: Union[bool, List[bool]] = True,
|
|
80
|
+
combiner: Callable = None,
|
|
81
|
+
batch_rows: Optional[int] = 1024,
|
|
82
|
+
mapper_dtypes: pd.Series = None,
|
|
83
|
+
mapper_index: pd.Index = None,
|
|
84
|
+
mapper_batch_rows: Optional[int] = None,
|
|
85
|
+
reducer_dtypes: pd.Series = None,
|
|
86
|
+
reducer_index: pd.Index = None,
|
|
87
|
+
reducer_batch_rows: Optional[int] = None,
|
|
88
|
+
ignore_index: bool = False,
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Map-reduce API over certain DataFrames. This function is roughly
|
|
92
|
+
a shortcut for
|
|
93
|
+
|
|
94
|
+
.. code-block:: python
|
|
95
|
+
|
|
96
|
+
df.mf.apply_chunk(mapper).groupby(group_keys).mf.apply_chunk(reducer)
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
mapper : function or type
|
|
101
|
+
Mapper function or class.
|
|
102
|
+
reducer : function or type
|
|
103
|
+
Reducer function or class.
|
|
104
|
+
group_cols : str or list[str]
|
|
105
|
+
The keys to group after mapper. If absent, all columns in the mapped
|
|
106
|
+
DataFrame will be used.
|
|
107
|
+
order_cols : str or list[str]
|
|
108
|
+
The columns to sort after groupby.
|
|
109
|
+
ascending : bool or list[bool] or None
|
|
110
|
+
Whether columns should be in ascending order or not, only effective when
|
|
111
|
+
`order_cols` are specified. If a list of booleans are passed, orders of
|
|
112
|
+
every column in `order_cols` are specified.
|
|
113
|
+
combiner : function or class
|
|
114
|
+
Combiner function or class. Should accept and returns the same schema
|
|
115
|
+
of mapper outputs.
|
|
116
|
+
batch_rows : int or None
|
|
117
|
+
Rows in batches for mappers and reducers. Ignored if `mapper_batch_rows`
|
|
118
|
+
specified for mappers or `reducer_batch_rows` specified for reducers.
|
|
119
|
+
1024 by default.
|
|
120
|
+
mapper_dtypes : pd.Series or dict or None
|
|
121
|
+
Output dtypes of mapper stage.
|
|
122
|
+
mapper_index : pd.Index or None
|
|
123
|
+
Index of DataFrame returned by mappers.
|
|
124
|
+
mapper_batch_rows : int or None
|
|
125
|
+
Rows in batches for mappers. If specified, `batch_rows` will be ignored
|
|
126
|
+
for mappers.
|
|
127
|
+
reducer_dtypes : pd.Series or dict or None
|
|
128
|
+
Output dtypes of reducer stage.
|
|
129
|
+
reducer_index : pd.Index or None
|
|
130
|
+
Index of DataFrame returned by reducers.
|
|
131
|
+
reducer_batch_rows : int or None
|
|
132
|
+
Rows in batches for mappers. If specified, `batch_rows` will be ignored
|
|
133
|
+
for reducers.
|
|
134
|
+
ignore_index : bool
|
|
135
|
+
If true, indexes generated at mapper or reducer functions will be ignored.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
output: DataFrame
|
|
140
|
+
Result DataFrame after map and reduce.
|
|
141
|
+
|
|
142
|
+
Examples
|
|
143
|
+
--------
|
|
144
|
+
|
|
145
|
+
We first define a DataFrame with a column of several words.
|
|
146
|
+
|
|
147
|
+
>>> from collections import defaultdict
|
|
148
|
+
>>> import maxframe.dataframe as md
|
|
149
|
+
>>> from maxframe.udf import with_running_options
|
|
150
|
+
>>> df = pd.DataFrame(
|
|
151
|
+
>>> {
|
|
152
|
+
>>> "name": ["name key", "name", "key", "name", "key name"],
|
|
153
|
+
>>> "id": [4, 2, 4, 3, 3],
|
|
154
|
+
>>> "fid": [5.3, 3.5, 4.2, 2.2, 4.1],
|
|
155
|
+
>>> }
|
|
156
|
+
>>> )
|
|
157
|
+
|
|
158
|
+
Then we write a mapper function which accepts batches in the DataFrame
|
|
159
|
+
and returns counts of words in every row.
|
|
160
|
+
|
|
161
|
+
>>> def mapper(batch):
|
|
162
|
+
>>> word_to_count = defaultdict(lambda: 0)
|
|
163
|
+
>>> for words in batch["name"]:
|
|
164
|
+
>>> for w in words.split():
|
|
165
|
+
>>> word_to_count[w] += 1
|
|
166
|
+
>>> return pd.DataFrame(
|
|
167
|
+
>>> [list(tp) for tp in word_to_count.items()], columns=["word", "count"]
|
|
168
|
+
>>> )
|
|
169
|
+
|
|
170
|
+
After that we write a reducer function which aggregates records with
|
|
171
|
+
the same word. Running options such as CPU specifications can be supplied
|
|
172
|
+
as well.
|
|
173
|
+
|
|
174
|
+
>>> @with_running_options(cpu=2)
|
|
175
|
+
>>> class TestReducer:
|
|
176
|
+
>>> def __init__(self):
|
|
177
|
+
>>> self._word_to_count = defaultdict(lambda: 0)
|
|
178
|
+
>>>
|
|
179
|
+
>>> def __call__(self, batch, end=False):
|
|
180
|
+
>>> word = None
|
|
181
|
+
>>> for _, row in batch.iterrows():
|
|
182
|
+
>>> word = row.iloc[0]
|
|
183
|
+
>>> self._word_to_count[row.iloc[0]] += row.iloc[1]
|
|
184
|
+
>>> if end:
|
|
185
|
+
>>> return pd.DataFrame(
|
|
186
|
+
>>> [[word, self._word_to_count[word]]], columns=["word", "count"]
|
|
187
|
+
>>> )
|
|
188
|
+
>>>
|
|
189
|
+
>>> def close(self):
|
|
190
|
+
>>> # you can do several cleanups here
|
|
191
|
+
>>> print("close")
|
|
192
|
+
|
|
193
|
+
Finally we can call `map_reduce` with mappers and reducers specified above.
|
|
194
|
+
|
|
195
|
+
>>> res = df.mf.map_reduce(
|
|
196
|
+
>>> mapper,
|
|
197
|
+
>>> TestReducer,
|
|
198
|
+
>>> group_cols=["word"],
|
|
199
|
+
>>> mapper_dtypes={"word": "str", "count": "int"},
|
|
200
|
+
>>> mapper_index=pd.Index([0]),
|
|
201
|
+
>>> reducer_dtypes={"word": "str", "count": "int"},
|
|
202
|
+
>>> reducer_index=pd.Index([0]),
|
|
203
|
+
>>> ignore_index=True,
|
|
204
|
+
>>> )
|
|
205
|
+
>>> res.execute().fetch()
|
|
206
|
+
word count
|
|
207
|
+
0 key 3
|
|
208
|
+
1 name 4
|
|
209
|
+
|
|
210
|
+
See Also
|
|
211
|
+
--------
|
|
212
|
+
DataFrame.mf.apply_chunk, DataFrame.groupby.mf.apply_chunk
|
|
213
|
+
"""
|
|
214
|
+
mapper_batch_rows = mapper_batch_rows or batch_rows
|
|
215
|
+
reducer_batch_rows = reducer_batch_rows or batch_rows
|
|
216
|
+
|
|
217
|
+
def check_arg(arg_type, locals_):
|
|
218
|
+
if locals_.get(arg_type) is not None:
|
|
219
|
+
return
|
|
220
|
+
for suffix in ("dtypes", "index"):
|
|
221
|
+
arg_name = f"{arg_type}_{suffix}"
|
|
222
|
+
if locals_.get(arg_name) is not None:
|
|
223
|
+
raise ValueError(f"Cannot specify {arg_name} when {arg_type} is None")
|
|
224
|
+
|
|
225
|
+
if mapper is None:
|
|
226
|
+
check_arg("mapper", locals())
|
|
227
|
+
mapped = df
|
|
228
|
+
group_cols = group_cols or df.dtypes.index
|
|
229
|
+
if combiner is not None:
|
|
230
|
+
raise ValueError("Combiner cannot be set when mapper is None")
|
|
231
|
+
else:
|
|
232
|
+
if combiner is not None:
|
|
233
|
+
mapper = _gen_combined_mapper(
|
|
234
|
+
mapper, combiner, group_cols, order_cols, ascending=ascending
|
|
235
|
+
)
|
|
236
|
+
mapped = df.mf.apply_chunk(
|
|
237
|
+
mapper,
|
|
238
|
+
batch_rows=mapper_batch_rows,
|
|
239
|
+
dtypes=mapper_dtypes,
|
|
240
|
+
output_type="dataframe",
|
|
241
|
+
index=mapper_index,
|
|
242
|
+
)
|
|
243
|
+
group_cols = group_cols or list(df.dtypes.index)
|
|
244
|
+
|
|
245
|
+
if reducer is None:
|
|
246
|
+
check_arg("reducer", locals())
|
|
247
|
+
res = mapped
|
|
248
|
+
else:
|
|
249
|
+
res = mapped.groupby(group_cols, group_keys=False)[
|
|
250
|
+
list(mapped.dtypes.index)
|
|
251
|
+
].mf.apply_chunk(
|
|
252
|
+
reducer,
|
|
253
|
+
batch_rows=reducer_batch_rows,
|
|
254
|
+
dtypes=reducer_dtypes,
|
|
255
|
+
output_type="dataframe",
|
|
256
|
+
index=reducer_index,
|
|
257
|
+
order_cols=order_cols,
|
|
258
|
+
ascending=ascending,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if ignore_index:
|
|
262
|
+
return res.reset_index(drop=True)
|
|
263
|
+
return res
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import Float64Field, Int64Field
|
|
17
|
+
from ...tensor.extensions.rebalance import RebalanceMixin
|
|
18
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
19
|
+
from ..utils import validate_axis
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataFrameRebalance(RebalanceMixin, DataFrameOperatorMixin, DataFrameOperator):
|
|
23
|
+
_op_type_ = opcodes.REBALANCE
|
|
24
|
+
|
|
25
|
+
axis = Int64Field("axis")
|
|
26
|
+
factor = Float64Field("factor", default=None)
|
|
27
|
+
num_partitions = Int64Field("num_partitions")
|
|
28
|
+
|
|
29
|
+
def __init__(self, output_types=None, **kw):
|
|
30
|
+
super().__init__(_output_types=output_types, **kw)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def rebalance(df_or_series, axis=0, factor=None, num_partitions=None):
|
|
34
|
+
"""
|
|
35
|
+
Make data more balanced across entire cluster.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
axis : int
|
|
40
|
+
The axis to rebalance.
|
|
41
|
+
factor : float
|
|
42
|
+
Specified so that number of chunks after balance is
|
|
43
|
+
total number of input chunks * factor.
|
|
44
|
+
num_partitions : int
|
|
45
|
+
Specified so the number of chunks are at most
|
|
46
|
+
num_partitions.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
Series or DataFrame
|
|
51
|
+
Result of DataFrame or Series after rebalanced.
|
|
52
|
+
"""
|
|
53
|
+
axis = validate_axis(axis, df_or_series)
|
|
54
|
+
if num_partitions is None and factor is None:
|
|
55
|
+
raise ValueError("Need to specify num_partitions or factor")
|
|
56
|
+
if num_partitions is not None and factor is not None:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"num_partitions and factor cannot be specified at the same time"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
op = DataFrameRebalance(axis=axis, factor=factor, num_partitions=num_partitions)
|
|
62
|
+
return op(df_or_series)
|