maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
from typing import Any, Callable, Dict, List, MutableMapping, Tuple, Union
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from ... import opcodes
|
|
22
|
+
from ...core import OutputType
|
|
23
|
+
from ...serialization.serializables import (
|
|
24
|
+
DictField,
|
|
25
|
+
FunctionField,
|
|
26
|
+
Int32Field,
|
|
27
|
+
TupleField,
|
|
28
|
+
)
|
|
29
|
+
from ...typing_ import TileableType
|
|
30
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
31
|
+
from ...utils import copy_if_possible, make_dtype, make_dtypes
|
|
32
|
+
from ..core import DATAFRAME_TYPE, INDEX_TYPE, DataFrame, IndexValue, Series
|
|
33
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
34
|
+
from ..utils import (
|
|
35
|
+
InferredDataFrameMeta,
|
|
36
|
+
build_df,
|
|
37
|
+
copy_func_scheduling_hints,
|
|
38
|
+
infer_dataframe_return_value,
|
|
39
|
+
pack_func_args,
|
|
40
|
+
parse_index,
|
|
41
|
+
validate_output_types,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
46
|
+
_op_type_ = opcodes.APPLY_CHUNK
|
|
47
|
+
_legacy_name = "DataFrameApplyChunkOperator" # since v2.0.0
|
|
48
|
+
|
|
49
|
+
func = FunctionField("func")
|
|
50
|
+
batch_rows = Int32Field("batch_rows", default=None)
|
|
51
|
+
args = TupleField("args", default=None)
|
|
52
|
+
kwargs = DictField("kwargs", default=None)
|
|
53
|
+
|
|
54
|
+
def __init__(self, output_type=None, **kw):
|
|
55
|
+
if output_type:
|
|
56
|
+
kw["_output_types"] = [output_type]
|
|
57
|
+
super().__init__(**kw)
|
|
58
|
+
if hasattr(self, "func"):
|
|
59
|
+
copy_func_scheduling_hints(self.func, self)
|
|
60
|
+
|
|
61
|
+
def has_custom_code(self) -> bool:
|
|
62
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
63
|
+
|
|
64
|
+
def check_inputs(self, inputs: List[TileableType]):
|
|
65
|
+
# for apply_chunk we allow called on non-deterministic tileables
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
|
|
69
|
+
# return dataframe
|
|
70
|
+
if self.output_types[0] == OutputType.dataframe:
|
|
71
|
+
dtypes = make_dtypes(dtypes)
|
|
72
|
+
if dtypes is not None:
|
|
73
|
+
shape = df.shape if element_wise else (np.nan, len(dtypes))
|
|
74
|
+
cols_value = parse_index(dtypes.index, store_data=True)
|
|
75
|
+
else:
|
|
76
|
+
shape = (np.nan, np.nan)
|
|
77
|
+
cols_value = None
|
|
78
|
+
# apply_chunk will use generate new range index for results
|
|
79
|
+
return self.new_dataframe(
|
|
80
|
+
[df],
|
|
81
|
+
shape=shape,
|
|
82
|
+
index_value=index_value,
|
|
83
|
+
columns_value=cols_value,
|
|
84
|
+
dtypes=dtypes,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# return series
|
|
88
|
+
return self.new_series(
|
|
89
|
+
[df], shape=(np.nan,), name=name, dtype=dtype, index_value=index_value
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def _call_series(self, series, dtypes, dtype, name, index_value, element_wise):
|
|
93
|
+
if self.output_types[0] == OutputType.series:
|
|
94
|
+
shape = series.shape if element_wise else (np.nan,)
|
|
95
|
+
return self.new_series(
|
|
96
|
+
[series],
|
|
97
|
+
dtype=dtype,
|
|
98
|
+
shape=shape,
|
|
99
|
+
index_value=index_value,
|
|
100
|
+
name=name,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
dtypes = make_dtypes(dtypes)
|
|
104
|
+
return self.new_dataframe(
|
|
105
|
+
[series],
|
|
106
|
+
shape=(np.nan, len(dtypes)),
|
|
107
|
+
index_value=index_value,
|
|
108
|
+
columns_value=parse_index(dtypes.index, store_data=True),
|
|
109
|
+
dtypes=dtypes,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def __call__(
|
|
113
|
+
self,
|
|
114
|
+
df_or_series: Union[DataFrame, Series],
|
|
115
|
+
dtypes: Union[Tuple[str, Any], Dict[str, Any]] = None,
|
|
116
|
+
dtype: Any = None,
|
|
117
|
+
name: Any = None,
|
|
118
|
+
output_type=None,
|
|
119
|
+
index=None,
|
|
120
|
+
skip_infer=False,
|
|
121
|
+
):
|
|
122
|
+
args = self.args or ()
|
|
123
|
+
kwargs = self.kwargs or {}
|
|
124
|
+
# if not dtypes and not skip_infer:
|
|
125
|
+
try:
|
|
126
|
+
packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
|
|
127
|
+
except:
|
|
128
|
+
if not skip_infer:
|
|
129
|
+
raise
|
|
130
|
+
packed_func = self.func
|
|
131
|
+
|
|
132
|
+
# if skip_infer, directly build a frame
|
|
133
|
+
if self.output_types and self.output_types[0] == OutputType.df_or_series:
|
|
134
|
+
return self.new_df_or_series([df_or_series])
|
|
135
|
+
|
|
136
|
+
# infer return index and dtypes
|
|
137
|
+
inferred_meta = self._infer_batch_func_returns(
|
|
138
|
+
df_or_series,
|
|
139
|
+
packed_func=packed_func,
|
|
140
|
+
output_type=output_type,
|
|
141
|
+
dtypes=dtypes,
|
|
142
|
+
dtype=dtype,
|
|
143
|
+
name=name,
|
|
144
|
+
index=index,
|
|
145
|
+
skip_infer=skip_infer,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if inferred_meta.index_value is None:
|
|
149
|
+
inferred_meta.index_value = parse_index(
|
|
150
|
+
None, (df_or_series.key, df_or_series.index_value.key, self.func)
|
|
151
|
+
)
|
|
152
|
+
if not skip_infer:
|
|
153
|
+
inferred_meta.check_absence("output_type", "dtypes", "dtype")
|
|
154
|
+
|
|
155
|
+
if isinstance(df_or_series, DATAFRAME_TYPE):
|
|
156
|
+
return self._call_dataframe(
|
|
157
|
+
df_or_series,
|
|
158
|
+
dtypes=inferred_meta.dtypes,
|
|
159
|
+
dtype=inferred_meta.dtype,
|
|
160
|
+
name=inferred_meta.name,
|
|
161
|
+
index_value=inferred_meta.index_value,
|
|
162
|
+
element_wise=inferred_meta.elementwise,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
return self._call_series(
|
|
166
|
+
df_or_series,
|
|
167
|
+
dtypes=inferred_meta.dtypes,
|
|
168
|
+
dtype=inferred_meta.dtype,
|
|
169
|
+
name=inferred_meta.name,
|
|
170
|
+
index_value=inferred_meta.index_value,
|
|
171
|
+
element_wise=inferred_meta.elementwise,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def _infer_batch_func_returns(
|
|
175
|
+
self,
|
|
176
|
+
input_df_or_series: Union[DataFrame, Series],
|
|
177
|
+
packed_func: Union[Callable, functools.partial],
|
|
178
|
+
output_type: OutputType,
|
|
179
|
+
*args,
|
|
180
|
+
dtypes: Union[pd.Series, List[Any], Dict[str, Any]] = None,
|
|
181
|
+
dtype: Any = None,
|
|
182
|
+
name: Any = None,
|
|
183
|
+
index: Union[pd.Index, IndexValue] = None,
|
|
184
|
+
elementwise: bool = None,
|
|
185
|
+
skip_infer: bool = False,
|
|
186
|
+
**kwargs,
|
|
187
|
+
) -> InferredDataFrameMeta:
|
|
188
|
+
inferred_meta = infer_dataframe_return_value(
|
|
189
|
+
input_df_or_series,
|
|
190
|
+
functools.partial(packed_func, *args, **kwargs),
|
|
191
|
+
output_type=output_type,
|
|
192
|
+
dtypes=dtypes,
|
|
193
|
+
dtype=dtype,
|
|
194
|
+
name=name,
|
|
195
|
+
index=index,
|
|
196
|
+
elementwise=elementwise,
|
|
197
|
+
skip_infer=skip_infer,
|
|
198
|
+
)
|
|
199
|
+
if skip_infer:
|
|
200
|
+
return inferred_meta
|
|
201
|
+
|
|
202
|
+
# merge specified and inferred index, dtypes, output_type
|
|
203
|
+
# elementwise used to decide shape
|
|
204
|
+
self.output_types = (
|
|
205
|
+
[inferred_meta.output_type]
|
|
206
|
+
if not self.output_types and inferred_meta.output_type
|
|
207
|
+
else self.output_types
|
|
208
|
+
)
|
|
209
|
+
if self.output_types:
|
|
210
|
+
inferred_meta.output_type = self.output_types[0]
|
|
211
|
+
inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
|
|
212
|
+
if isinstance(index, INDEX_TYPE):
|
|
213
|
+
index = index.index_value
|
|
214
|
+
if index is not None:
|
|
215
|
+
inferred_meta.index_value = (
|
|
216
|
+
parse_index(index)
|
|
217
|
+
if index is not input_df_or_series.index_value
|
|
218
|
+
else input_df_or_series.index_value
|
|
219
|
+
)
|
|
220
|
+
inferred_meta.elementwise = elementwise or inferred_meta.elementwise
|
|
221
|
+
return inferred_meta
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
def estimate_size(
|
|
225
|
+
cls,
|
|
226
|
+
ctx: MutableMapping[str, Union[int, float]],
|
|
227
|
+
op: "DataFrameApplyChunk",
|
|
228
|
+
) -> None:
|
|
229
|
+
if isinstance(op.func, MarkedFunction):
|
|
230
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
231
|
+
super().estimate_size(ctx, op)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# Keep for import compatibility
|
|
235
|
+
DataFrameApplyChunkOperator = DataFrameApplyChunk
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def get_packed_func(df, func, *args, **kwargs) -> Any:
|
|
239
|
+
stub_df = build_df(df, fill_value=1, size=1)
|
|
240
|
+
n_args = copy_if_possible(args)
|
|
241
|
+
n_kwargs = copy_if_possible(kwargs)
|
|
242
|
+
return pack_func_args(stub_df, func, *n_args, **n_kwargs)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def df_apply_chunk(
|
|
246
|
+
dataframe,
|
|
247
|
+
func: Union[str, Callable],
|
|
248
|
+
batch_rows=None,
|
|
249
|
+
dtypes=None,
|
|
250
|
+
dtype=None,
|
|
251
|
+
name=None,
|
|
252
|
+
output_type=None,
|
|
253
|
+
index=None,
|
|
254
|
+
skip_infer=False,
|
|
255
|
+
args=(),
|
|
256
|
+
**kwargs,
|
|
257
|
+
):
|
|
258
|
+
"""
|
|
259
|
+
Apply a function that takes pandas DataFrame and outputs pandas DataFrame/Series.
|
|
260
|
+
The pandas DataFrame given to the function is a chunk of the input dataframe, consider as a batch rows.
|
|
261
|
+
|
|
262
|
+
The objects passed into this function are slices of the original DataFrame, containing at most batch_rows
|
|
263
|
+
number of rows and all columns. It is equivalent to merging multiple ``df.apply`` with ``axis=1`` inputs and then
|
|
264
|
+
passing them into the function for execution, thereby improving performance in specific scenarios. The function
|
|
265
|
+
output can be either a DataFrame or a Series. ``apply_chunk`` will ultimately merge the results into a new
|
|
266
|
+
DataFrame or Series.
|
|
267
|
+
|
|
268
|
+
Don't expect to receive all rows of the DataFrame in the function, as it depends on the implementation
|
|
269
|
+
of MaxFrame and the internal running state of MaxCompute.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
func : str or Callable
|
|
274
|
+
Function to apply to the dataframe chunk.
|
|
275
|
+
|
|
276
|
+
batch_rows : int
|
|
277
|
+
Specify expected number of rows in a batch, as well as the len of function input dataframe. When the remaining
|
|
278
|
+
data is insufficient, it may be less than this number.
|
|
279
|
+
|
|
280
|
+
output_type : {'dataframe', 'series'}, default None
|
|
281
|
+
Specify type of returned object. See `Notes` for more details.
|
|
282
|
+
|
|
283
|
+
dtypes : Series, default None
|
|
284
|
+
Specify dtypes of returned DataFrames. See `Notes` for more details.
|
|
285
|
+
|
|
286
|
+
dtype : numpy.dtype, default None
|
|
287
|
+
Specify dtype of returned Series. See `Notes` for more details.
|
|
288
|
+
|
|
289
|
+
name : str, default None
|
|
290
|
+
Specify name of returned Series. See `Notes` for more details.
|
|
291
|
+
|
|
292
|
+
index : Index, default None
|
|
293
|
+
Specify index of returned object. See `Notes` for more details.
|
|
294
|
+
|
|
295
|
+
skip_infer: bool, default False
|
|
296
|
+
Whether infer dtypes when dtypes or output_type is not specified.
|
|
297
|
+
|
|
298
|
+
args : tuple
|
|
299
|
+
Positional arguments to pass to ``func`` in addition to the
|
|
300
|
+
array/series.
|
|
301
|
+
|
|
302
|
+
**kwds
|
|
303
|
+
Additional keyword arguments to pass as keywords arguments to
|
|
304
|
+
``func``.
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
Series or DataFrame
|
|
309
|
+
Result of applying ``func`` along the given chunk of the
|
|
310
|
+
DataFrame.
|
|
311
|
+
|
|
312
|
+
See Also
|
|
313
|
+
--------
|
|
314
|
+
DataFrame.apply: For non-batching operations.
|
|
315
|
+
Series.mf.apply_chunk: Apply function to Series chunk.
|
|
316
|
+
|
|
317
|
+
Notes
|
|
318
|
+
-----
|
|
319
|
+
When deciding output dtypes and shape of the return value, MaxFrame will
|
|
320
|
+
try applying ``func`` onto a mock DataFrame, and the apply call may
|
|
321
|
+
fail. When this happens, you need to specify the type of apply call
|
|
322
|
+
(DataFrame or Series) in output_type.
|
|
323
|
+
|
|
324
|
+
* For DataFrame output, you need to specify a list or a pandas Series
|
|
325
|
+
as ``dtypes`` of output DataFrame. ``index`` of output can also be
|
|
326
|
+
specified.
|
|
327
|
+
* For Series output, you need to specify ``dtype`` and ``name`` of
|
|
328
|
+
output Series.
|
|
329
|
+
* For any input with data type ``pandas.ArrowDtype(pyarrow.MapType)``, it will always
|
|
330
|
+
be converted to a Python dict. And for any output with this data type, it must be
|
|
331
|
+
returned as a Python dict as well.
|
|
332
|
+
|
|
333
|
+
Examples
|
|
334
|
+
--------
|
|
335
|
+
>>> import numpy as np
|
|
336
|
+
>>> import maxframe.tensor as mt
|
|
337
|
+
>>> import maxframe.dataframe as md
|
|
338
|
+
>>> df = md.DataFrame([[4, 9]] * 3, columns=['A', 'B'])
|
|
339
|
+
>>> df.execute()
|
|
340
|
+
A B
|
|
341
|
+
0 4 9
|
|
342
|
+
1 4 9
|
|
343
|
+
2 4 9
|
|
344
|
+
|
|
345
|
+
Use different batch_rows will collect different dataframe chunk into the function.
|
|
346
|
+
|
|
347
|
+
For example, when you use ``batch_rows=3``, it means that the function will wait until 3 rows are collected.
|
|
348
|
+
|
|
349
|
+
>>> df.mf.apply_chunk(np.sum, batch_rows=3).execute()
|
|
350
|
+
A 12
|
|
351
|
+
B 27
|
|
352
|
+
dtype: int64
|
|
353
|
+
|
|
354
|
+
While, if ``batch_rows=2``, the data will be divided into at least two segments. Additionally, if your function
|
|
355
|
+
alters the shape of the dataframe, it may result in different outputs.
|
|
356
|
+
|
|
357
|
+
>>> df.mf.apply_chunk(np.sum, batch_rows=2).execute()
|
|
358
|
+
A 8
|
|
359
|
+
B 18
|
|
360
|
+
A 4
|
|
361
|
+
B 9
|
|
362
|
+
dtype: int64
|
|
363
|
+
|
|
364
|
+
If the function requires some parameters, you can specify them using args or kwargs.
|
|
365
|
+
|
|
366
|
+
>>> def calc(df, x, y):
|
|
367
|
+
... return df * x + y
|
|
368
|
+
>>> df.mf.apply_chunk(calc, args=(10,), y=20).execute()
|
|
369
|
+
A B
|
|
370
|
+
0 60 110
|
|
371
|
+
1 60 110
|
|
372
|
+
2 60 110
|
|
373
|
+
|
|
374
|
+
The batch rows will benefit the actions consume a dataframe, like sklearn predict.
|
|
375
|
+
You can easily use sklearn in MaxFrame to perform offline inference, and apply_chunk makes this process more
|
|
376
|
+
efficient. The ``@with_python_requirements`` provides the capability to automatically package and load
|
|
377
|
+
dependencies.
|
|
378
|
+
|
|
379
|
+
Once you rely on some third-party dependencies, MaxFrame may not be able to correctly infer the return type.
|
|
380
|
+
Therefore, using ``output_type`` with ``dtype`` or ``dtypes`` is necessary.
|
|
381
|
+
|
|
382
|
+
>>> from maxframe.udf import with_python_requirements
|
|
383
|
+
>>> data = {
|
|
384
|
+
... 'A': np.random.rand(10),
|
|
385
|
+
... 'B': np.random.rand(10)
|
|
386
|
+
... }
|
|
387
|
+
>>> pd_df = pd.DataFrame(data)
|
|
388
|
+
>>> X = pd_df[['A']]
|
|
389
|
+
>>> y = pd_df['B']
|
|
390
|
+
|
|
391
|
+
>>> from sklearn.model_selection import train_test_split
|
|
392
|
+
>>> from sklearn.linear_model import LinearRegression
|
|
393
|
+
>>> model = LinearRegression()
|
|
394
|
+
>>> model.fit(X, y)
|
|
395
|
+
|
|
396
|
+
>>> @with_python_requirements("scikit-learn")
|
|
397
|
+
... def predict(df):
|
|
398
|
+
... predict_B = model.predict(df[["A"]])
|
|
399
|
+
... return pd.Series(predict_B, index=df.A.index)
|
|
400
|
+
|
|
401
|
+
>>> df.mf.apply_chunk(predict, batch_rows=3, output_type="series", dtype="float", name="predict_B").execute()
|
|
402
|
+
0 -0.765025
|
|
403
|
+
1 -0.765025
|
|
404
|
+
2 -0.765025
|
|
405
|
+
Name: predict_B, dtype: float64
|
|
406
|
+
|
|
407
|
+
Create a dataframe with a dict type.
|
|
408
|
+
|
|
409
|
+
>>> import pyarrow as pa
|
|
410
|
+
>>> import pandas as pd
|
|
411
|
+
>>> from maxframe.lib.dtypes_extension import dict_
|
|
412
|
+
>>> col_a = pd.Series(
|
|
413
|
+
... data=[[("k1", 1), ("k2", 2)], [("k1", 3)], None],
|
|
414
|
+
... index=[1, 2, 3],
|
|
415
|
+
... dtype=dict_(pa.string(), pa.int64()),
|
|
416
|
+
... )
|
|
417
|
+
>>> col_b = pd.Series(
|
|
418
|
+
... data=["A", "B", "C"],
|
|
419
|
+
... index=[1, 2, 3],
|
|
420
|
+
... )
|
|
421
|
+
>>> df = md.DataFrame({"A": col_a, "B": col_b})
|
|
422
|
+
>>> df.execute()
|
|
423
|
+
A B
|
|
424
|
+
1 [('k1', 1), ('k2', 2)] A
|
|
425
|
+
2 [('k1', 3)] B
|
|
426
|
+
3 <NA> C
|
|
427
|
+
|
|
428
|
+
Define a function that updates the map type with a new key-value pair in a batch.
|
|
429
|
+
|
|
430
|
+
>>> def custom_set_item(df):
|
|
431
|
+
... for name, value in df["A"].items():
|
|
432
|
+
... if value is not None:
|
|
433
|
+
... df["A"][name]["x"] = 100
|
|
434
|
+
... return df
|
|
435
|
+
|
|
436
|
+
>>> mf.apply_chunk(
|
|
437
|
+
... process,
|
|
438
|
+
... output_type="dataframe",
|
|
439
|
+
... dtypes=md_df.dtypes.copy(),
|
|
440
|
+
... batch_rows=2,
|
|
441
|
+
... skip_infer=True,
|
|
442
|
+
... index=md_df.index,
|
|
443
|
+
... )
|
|
444
|
+
A B
|
|
445
|
+
1 [('k1', 1), ('k2', 2), ('x', 10))] A
|
|
446
|
+
2 [('k1', 3), ('x', 10)] B
|
|
447
|
+
3 <NA> C
|
|
448
|
+
"""
|
|
449
|
+
if not isinstance(func, Callable):
|
|
450
|
+
raise TypeError("function must be a callable object")
|
|
451
|
+
|
|
452
|
+
if batch_rows is not None:
|
|
453
|
+
if not isinstance(batch_rows, int):
|
|
454
|
+
raise TypeError("batch_rows must be an integer")
|
|
455
|
+
elif batch_rows <= 0:
|
|
456
|
+
raise ValueError("batch_rows must be greater than 0")
|
|
457
|
+
|
|
458
|
+
if dtype is not None:
|
|
459
|
+
dtype = make_dtype(dtype)
|
|
460
|
+
|
|
461
|
+
output_types = kwargs.pop("output_types", None)
|
|
462
|
+
object_type = kwargs.pop("object_type", None)
|
|
463
|
+
output_types = validate_output_types(
|
|
464
|
+
output_type=output_type, output_types=output_types, object_type=object_type
|
|
465
|
+
)
|
|
466
|
+
output_type = output_types[0] if output_types else None
|
|
467
|
+
if skip_infer and output_type is None:
|
|
468
|
+
output_type = OutputType.df_or_series
|
|
469
|
+
|
|
470
|
+
# bind args and kwargs
|
|
471
|
+
op = DataFrameApplyChunk(
|
|
472
|
+
func=func,
|
|
473
|
+
batch_rows=batch_rows,
|
|
474
|
+
output_type=output_type,
|
|
475
|
+
args=args,
|
|
476
|
+
kwargs=kwargs,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
return op(
|
|
480
|
+
dataframe,
|
|
481
|
+
dtypes=dtypes,
|
|
482
|
+
dtype=dtype,
|
|
483
|
+
name=name,
|
|
484
|
+
index=index,
|
|
485
|
+
output_type=output_type,
|
|
486
|
+
skip_infer=skip_infer,
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def series_apply_chunk(
|
|
491
|
+
dataframe_or_series,
|
|
492
|
+
func: Union[str, Callable],
|
|
493
|
+
batch_rows=None,
|
|
494
|
+
dtypes=None,
|
|
495
|
+
dtype=None,
|
|
496
|
+
name=None,
|
|
497
|
+
output_type=None,
|
|
498
|
+
index=None,
|
|
499
|
+
skip_infer=False,
|
|
500
|
+
args=(),
|
|
501
|
+
**kwargs,
|
|
502
|
+
):
|
|
503
|
+
"""
|
|
504
|
+
Apply a function that takes pandas Series and outputs pandas DataFrame/Series.
|
|
505
|
+
The pandas DataFrame given to the function is a chunk of the input series.
|
|
506
|
+
|
|
507
|
+
The objects passed into this function are slices of the original series, containing at most batch_rows
|
|
508
|
+
number of elements. The function output can be either a DataFrame or a Series.
|
|
509
|
+
``apply_chunk`` will ultimately merge the results into a new DataFrame or Series.
|
|
510
|
+
|
|
511
|
+
Don't expect to receive all elements of series in the function, as it depends on the implementation
|
|
512
|
+
of MaxFrame and the internal running state of MaxCompute.
|
|
513
|
+
|
|
514
|
+
Can be ufunc (a NumPy function that applies to the entire Series)
|
|
515
|
+
or a Python function that only works on series.
|
|
516
|
+
|
|
517
|
+
Parameters
|
|
518
|
+
----------
|
|
519
|
+
func : function
|
|
520
|
+
Python function or NumPy ufunc to apply.
|
|
521
|
+
|
|
522
|
+
batch_rows : int
|
|
523
|
+
Specify expected number of elements in a batch, as well as the len of function input series.
|
|
524
|
+
When the remaining data is insufficient, it may be less than this number.
|
|
525
|
+
|
|
526
|
+
output_type : {'dataframe', 'series'}, default None
|
|
527
|
+
Specify type of returned object. See `Notes` for more details.
|
|
528
|
+
|
|
529
|
+
dtypes : Series, default None
|
|
530
|
+
Specify dtypes of returned DataFrames. See `Notes` for more details.
|
|
531
|
+
|
|
532
|
+
dtype : numpy.dtype, default None
|
|
533
|
+
Specify dtype of returned Series. See `Notes` for more details.
|
|
534
|
+
|
|
535
|
+
name : str, default None
|
|
536
|
+
Specify name of returned Series. See `Notes` for more details.
|
|
537
|
+
|
|
538
|
+
index : Index, default None
|
|
539
|
+
Specify index of returned object. See `Notes` for more details.
|
|
540
|
+
|
|
541
|
+
args : tuple
|
|
542
|
+
Positional arguments passed to func after the series value.
|
|
543
|
+
|
|
544
|
+
skip_infer: bool, default False
|
|
545
|
+
Whether infer dtypes when dtypes or output_type is not specified.
|
|
546
|
+
|
|
547
|
+
**kwds
|
|
548
|
+
Additional keyword arguments passed to func.
|
|
549
|
+
|
|
550
|
+
Returns
|
|
551
|
+
-------
|
|
552
|
+
Series or DataFrame
|
|
553
|
+
If func returns a Series object the result will be a Series, else the result will be a DataFrame.
|
|
554
|
+
|
|
555
|
+
See Also
|
|
556
|
+
--------
|
|
557
|
+
DataFrame.apply_chunk: Apply function to DataFrame chunk.
|
|
558
|
+
Series.apply: For non-batching operations.
|
|
559
|
+
|
|
560
|
+
Notes
|
|
561
|
+
-----
|
|
562
|
+
When deciding output dtypes and shape of the return value, MaxFrame will
|
|
563
|
+
try applying ``func`` onto a mock Series, and the apply call may fail.
|
|
564
|
+
When this happens, you need to specify the type of apply call
|
|
565
|
+
(DataFrame or Series) in output_type.
|
|
566
|
+
|
|
567
|
+
* For DataFrame output, you need to specify a list or a pandas Series
|
|
568
|
+
as ``dtypes`` of output DataFrame. ``index`` of output can also be
|
|
569
|
+
specified.
|
|
570
|
+
* For Series output, you need to specify ``dtype`` and ``name`` of
|
|
571
|
+
output Series.
|
|
572
|
+
* For any input with data type ``pandas.ArrowDtype(pyarrow.MapType)``, it will always
|
|
573
|
+
be converted to a Python dict. And for any output with this data type, it must be
|
|
574
|
+
returned as a Python dict as well.
|
|
575
|
+
|
|
576
|
+
Examples
|
|
577
|
+
--------
|
|
578
|
+
Create a series with typical summer temperatures for each city.
|
|
579
|
+
|
|
580
|
+
>>> import maxframe.tensor as mt
|
|
581
|
+
>>> import maxframe.dataframe as md
|
|
582
|
+
>>> s = md.Series([20, 21, 12],
|
|
583
|
+
... index=['London', 'New York', 'Helsinki'])
|
|
584
|
+
>>> s.execute()
|
|
585
|
+
London 20
|
|
586
|
+
New York 21
|
|
587
|
+
Helsinki 12
|
|
588
|
+
dtype: int64
|
|
589
|
+
|
|
590
|
+
Square the values by defining a function and passing it as an
|
|
591
|
+
argument to ``apply_chunk()``.
|
|
592
|
+
|
|
593
|
+
>>> def square(x):
|
|
594
|
+
... return x ** 2
|
|
595
|
+
>>> s.mf.apply_chunk(square, batch_rows=2).execute()
|
|
596
|
+
London 400
|
|
597
|
+
New York 441
|
|
598
|
+
Helsinki 144
|
|
599
|
+
dtype: int64
|
|
600
|
+
|
|
601
|
+
Square the values by passing an anonymous function as an
|
|
602
|
+
argument to ``apply_chunk()``.
|
|
603
|
+
|
|
604
|
+
>>> s.mf.apply_chunk(lambda x: x**2, batch_rows=2).execute()
|
|
605
|
+
London 400
|
|
606
|
+
New York 441
|
|
607
|
+
Helsinki 144
|
|
608
|
+
dtype: int64
|
|
609
|
+
|
|
610
|
+
Define a custom function that needs additional positional
|
|
611
|
+
arguments and pass these additional arguments using the
|
|
612
|
+
``args`` keyword.
|
|
613
|
+
|
|
614
|
+
>>> def subtract_custom_value(x, custom_value):
|
|
615
|
+
... return x - custom_value
|
|
616
|
+
|
|
617
|
+
>>> s.mf.apply_chunk(subtract_custom_value, args=(5,), batch_rows=3).execute()
|
|
618
|
+
London 15
|
|
619
|
+
New York 16
|
|
620
|
+
Helsinki 7
|
|
621
|
+
dtype: int64
|
|
622
|
+
|
|
623
|
+
Define a custom function that takes keyword arguments
|
|
624
|
+
and pass these arguments to ``apply_chunk``.
|
|
625
|
+
|
|
626
|
+
>>> def add_custom_values(x, **kwargs):
|
|
627
|
+
... for month in kwargs:
|
|
628
|
+
... x += kwargs[month]
|
|
629
|
+
... return x
|
|
630
|
+
|
|
631
|
+
>>> s.mf.apply_chunk(add_custom_values, batch_rows=2, june=30, july=20, august=25).execute()
|
|
632
|
+
London 95
|
|
633
|
+
New York 96
|
|
634
|
+
Helsinki 87
|
|
635
|
+
dtype: int64
|
|
636
|
+
|
|
637
|
+
If func return a dataframe, the apply_chunk will return a dataframe as well.
|
|
638
|
+
|
|
639
|
+
>>> def get_dataframe(x):
|
|
640
|
+
... return pd.concat([x, x], axis=1)
|
|
641
|
+
|
|
642
|
+
>>> s.mf.apply_chunk(get_dataframe, batch_rows=2).execute()
|
|
643
|
+
0 1
|
|
644
|
+
London 20 20
|
|
645
|
+
New York 21 21
|
|
646
|
+
Helsinki 12 12
|
|
647
|
+
|
|
648
|
+
Provides a dtypes or dtype with name to naming the output schema.
|
|
649
|
+
|
|
650
|
+
>>> s.mf.apply_chunk(
|
|
651
|
+
... get_dataframe,
|
|
652
|
+
... batch_rows=2,
|
|
653
|
+
... dtypes={"A": np.int_, "B": np.int_},
|
|
654
|
+
... output_type="dataframe"
|
|
655
|
+
... ).execute()
|
|
656
|
+
A B
|
|
657
|
+
London 20 20
|
|
658
|
+
New York 21 21
|
|
659
|
+
Helsinki 12 12
|
|
660
|
+
|
|
661
|
+
Create a series with a dict type.
|
|
662
|
+
|
|
663
|
+
>>> import pyarrow as pa
|
|
664
|
+
>>> from maxframe.lib.dtypes_extension import dict_
|
|
665
|
+
>>> s = md.Series(
|
|
666
|
+
... data=[[("k1", 1), ("k2", 2)], [("k1", 3)], None],
|
|
667
|
+
... index=[1, 2, 3],
|
|
668
|
+
... dtype=dict_(pa.string(), pa.int64()),
|
|
669
|
+
... )
|
|
670
|
+
>>> s.execute()
|
|
671
|
+
1 [('k1', 1), ('k2', 2)]
|
|
672
|
+
2 [('k1', 3)]
|
|
673
|
+
3 <NA>
|
|
674
|
+
dtype: map<string, int64>[pyarrow]
|
|
675
|
+
|
|
676
|
+
Define a function that updates the map type with a new key-value pair in a batch.
|
|
677
|
+
|
|
678
|
+
>>> def custom_set_item(row):
|
|
679
|
+
... for _, value in row.items():
|
|
680
|
+
... if value is not None:
|
|
681
|
+
... value["x"] = 100
|
|
682
|
+
... return row
|
|
683
|
+
|
|
684
|
+
>>> s.mf.apply_chunk(
|
|
685
|
+
... custom_set_item,
|
|
686
|
+
... output_type="series",
|
|
687
|
+
... dtype=s.dtype,
|
|
688
|
+
... batch_rows=2,
|
|
689
|
+
... skip_infer=True,
|
|
690
|
+
... index=s.index,
|
|
691
|
+
... ).execute()
|
|
692
|
+
1 [('k1', 1), ('k2', 2), ('x', 100)]
|
|
693
|
+
2 [('k1', 3), ('x', 100)]
|
|
694
|
+
3 <NA>
|
|
695
|
+
dtype: map<string, int64>[pyarrow]
|
|
696
|
+
"""
|
|
697
|
+
if not isinstance(func, Callable):
|
|
698
|
+
raise TypeError("function must be a callable object")
|
|
699
|
+
|
|
700
|
+
if batch_rows is not None:
|
|
701
|
+
if not isinstance(batch_rows, int):
|
|
702
|
+
raise TypeError("batch_rows must be an integer")
|
|
703
|
+
if batch_rows <= 0:
|
|
704
|
+
raise ValueError("batch_rows must be greater than 0")
|
|
705
|
+
|
|
706
|
+
# bind args and kwargs
|
|
707
|
+
output_types = kwargs.pop("output_types", None)
|
|
708
|
+
object_type = kwargs.pop("object_type", None)
|
|
709
|
+
output_types = validate_output_types(
|
|
710
|
+
output_type=output_type, output_types=output_types, object_type=object_type
|
|
711
|
+
)
|
|
712
|
+
output_type = output_types[0] if output_types else None
|
|
713
|
+
if skip_infer and output_type is None:
|
|
714
|
+
output_type = OutputType.df_or_series
|
|
715
|
+
|
|
716
|
+
op = DataFrameApplyChunk(
|
|
717
|
+
func=func,
|
|
718
|
+
batch_rows=batch_rows,
|
|
719
|
+
output_type=output_type,
|
|
720
|
+
args=args,
|
|
721
|
+
kwargs=kwargs,
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
if dtype is not None:
|
|
725
|
+
dtype = make_dtype(dtype)
|
|
726
|
+
return op(
|
|
727
|
+
dataframe_or_series,
|
|
728
|
+
dtypes=make_dtypes(dtypes),
|
|
729
|
+
dtype=dtype,
|
|
730
|
+
name=name,
|
|
731
|
+
output_type=output_type,
|
|
732
|
+
index=index,
|
|
733
|
+
)
|