maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,923 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
import inspect
|
|
17
|
+
from collections import OrderedDict, namedtuple
|
|
18
|
+
from typing import Any, Dict, List, NamedTuple, Optional, Tuple
|
|
19
|
+
|
|
20
|
+
import msgpack
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from ...core import ENTITY_TYPE, enter_mode, is_build_mode, is_kernel_mode
|
|
25
|
+
from ...serialization.serializables import (
|
|
26
|
+
AnyField,
|
|
27
|
+
BoolField,
|
|
28
|
+
DataTypeField,
|
|
29
|
+
DictField,
|
|
30
|
+
Int32Field,
|
|
31
|
+
Serializable,
|
|
32
|
+
StringField,
|
|
33
|
+
)
|
|
34
|
+
from ...typing_ import TileableType
|
|
35
|
+
from ...utils import get_item_if_scalar, get_pd_option, pd_release_version, tokenize
|
|
36
|
+
from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
|
|
37
|
+
from ..utils import (
|
|
38
|
+
build_df,
|
|
39
|
+
build_empty_df,
|
|
40
|
+
build_empty_series,
|
|
41
|
+
build_series,
|
|
42
|
+
parse_index,
|
|
43
|
+
validate_axis,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# in pandas<1.3, when aggregating with multiple levels and numeric_only is True,
|
|
47
|
+
# object cols not ignored with min-max funcs
|
|
48
|
+
_level_reduction_keep_object = pd_release_version[:2] < (1, 3)
|
|
49
|
+
# in pandas>=1.3, when dataframes are reduced into series, mixture of float and bool
|
|
50
|
+
# results in object.
|
|
51
|
+
_reduce_bool_as_object = pd_release_version[:2] != (1, 2)
|
|
52
|
+
|
|
53
|
+
_idx_reduction_without_numeric_only = pd_release_version[:2] < (1, 5)
|
|
54
|
+
|
|
55
|
+
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DataFrameReduction(DataFrameOperator):
|
|
59
|
+
_legacy_name = "DataFrameReductionOperator" # since v2.2.0
|
|
60
|
+
|
|
61
|
+
axis = AnyField("axis", default=None)
|
|
62
|
+
skipna = BoolField("skipna", default=True)
|
|
63
|
+
level = AnyField("level", default=None)
|
|
64
|
+
numeric_only = BoolField("numeric_only", default=None)
|
|
65
|
+
bool_only = BoolField("bool_only", default=None)
|
|
66
|
+
min_count = Int32Field("min_count", default=None)
|
|
67
|
+
method = StringField("method", default=None)
|
|
68
|
+
|
|
69
|
+
dtype = DataTypeField("dtype", default=None)
|
|
70
|
+
combine_size = Int32Field("combine_size", default=None)
|
|
71
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
72
|
+
|
|
73
|
+
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
74
|
+
kw["use_inf_as_na"] = kw.pop(
|
|
75
|
+
"use_inf_as_na", get_pd_option("mode.use_inf_as_na", False)
|
|
76
|
+
)
|
|
77
|
+
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def is_atomic(self):
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
def get_reduction_args(self, axis=None):
|
|
84
|
+
args = dict(skipna=self.skipna)
|
|
85
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
86
|
+
args["axis"] = axis
|
|
87
|
+
if self.numeric_only is not None:
|
|
88
|
+
args["numeric_only"] = self.numeric_only
|
|
89
|
+
if self.bool_only is not None:
|
|
90
|
+
args["bool_only"] = self.bool_only
|
|
91
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Keep for import compatibility
|
|
95
|
+
DataFrameReductionOperator = DataFrameReduction
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class DataFrameCumReduction(DataFrameOperator):
|
|
99
|
+
_legacy_name = "DataFrameCumReductionOperator" # since v2.2.0
|
|
100
|
+
|
|
101
|
+
axis = AnyField("axis", default=None)
|
|
102
|
+
skipna = BoolField("skipna", default=None)
|
|
103
|
+
|
|
104
|
+
dtype = DataTypeField("dtype", default=None)
|
|
105
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
106
|
+
|
|
107
|
+
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
108
|
+
kw["use_inf_as_na"] = kw.pop(
|
|
109
|
+
"use_inf_as_na", get_pd_option("mode.use_inf_as_na", False)
|
|
110
|
+
)
|
|
111
|
+
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# Keep for import compatibility
|
|
115
|
+
DataFrameCumReductionOperator = DataFrameCumReduction
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@functools.lru_cache(100)
|
|
119
|
+
def _get_series_reduction_dtype(
|
|
120
|
+
dtype,
|
|
121
|
+
func_name,
|
|
122
|
+
axis=None,
|
|
123
|
+
bool_only=False,
|
|
124
|
+
skipna=True,
|
|
125
|
+
numeric_only=False,
|
|
126
|
+
):
|
|
127
|
+
test_series = build_series(dtype=dtype, ensure_string=True)
|
|
128
|
+
if func_name == "count":
|
|
129
|
+
reduced = test_series.count()
|
|
130
|
+
elif func_name == "nunique":
|
|
131
|
+
reduced = test_series.nunique()
|
|
132
|
+
elif func_name in ("all", "any"):
|
|
133
|
+
reduced = getattr(test_series, func_name)(axis=axis, bool_only=bool_only)
|
|
134
|
+
elif func_name == "size":
|
|
135
|
+
reduced = test_series.size
|
|
136
|
+
elif func_name == "str_concat":
|
|
137
|
+
reduced = pd.Series([test_series.str.cat()])
|
|
138
|
+
elif func_name in ("idxmin", "idxmax", "argmin", "argmax"):
|
|
139
|
+
reduced = getattr(test_series, func_name)(axis=axis, skipna=skipna)
|
|
140
|
+
else:
|
|
141
|
+
reduced = getattr(test_series, func_name)(
|
|
142
|
+
axis=axis, skipna=skipna, numeric_only=numeric_only
|
|
143
|
+
)
|
|
144
|
+
return pd.Series(reduced).dtype
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@functools.lru_cache(100)
|
|
148
|
+
def _get_df_reduction_dtype(
|
|
149
|
+
dtype, func_name, axis=None, bool_only=False, skipna=False, numeric_only=False
|
|
150
|
+
):
|
|
151
|
+
test_df = build_series(dtype=dtype, ensure_string=True).to_frame()
|
|
152
|
+
if func_name == "count":
|
|
153
|
+
reduced = getattr(test_df, func_name)(axis=axis, numeric_only=numeric_only)
|
|
154
|
+
elif func_name == "nunique":
|
|
155
|
+
reduced = getattr(test_df, func_name)(axis=axis)
|
|
156
|
+
elif func_name in ("all", "any"):
|
|
157
|
+
reduced = getattr(test_df, func_name)(axis=axis, bool_only=bool_only)
|
|
158
|
+
elif _idx_reduction_without_numeric_only and func_name in ("idxmin", "idxmax"):
|
|
159
|
+
reduced = getattr(test_df, func_name)(axis=axis, skipna=skipna)
|
|
160
|
+
elif func_name == "str_concat":
|
|
161
|
+
reduced = test_df.apply(lambda s: s.str.cat(), axis=axis)
|
|
162
|
+
else:
|
|
163
|
+
reduced = getattr(test_df, func_name)(
|
|
164
|
+
axis=axis, skipna=skipna, numeric_only=numeric_only
|
|
165
|
+
)
|
|
166
|
+
if len(reduced) == 0:
|
|
167
|
+
return None
|
|
168
|
+
return reduced.dtype
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class ReductionCallable(Serializable):
|
|
172
|
+
func_name = StringField("func_name")
|
|
173
|
+
kwargs = DictField("kwargs", default=None)
|
|
174
|
+
|
|
175
|
+
def __name__(self):
|
|
176
|
+
return self.func_name
|
|
177
|
+
|
|
178
|
+
def __call__(self, value):
|
|
179
|
+
kw = self.kwargs.copy()
|
|
180
|
+
if value.ndim == 1:
|
|
181
|
+
kw.pop("bool_only", None)
|
|
182
|
+
kw.pop("numeric_only", None)
|
|
183
|
+
return getattr(value, self.func_name)(**kw)
|
|
184
|
+
else:
|
|
185
|
+
return getattr(value, self.func_name)(**kw)
|
|
186
|
+
|
|
187
|
+
def __maxframe_tokenize__(self):
|
|
188
|
+
# make sure compiled functions are correctly cached
|
|
189
|
+
return type(self), self.func_name, self.kwargs
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
193
|
+
@classmethod
|
|
194
|
+
def get_reduction_callable(cls, op):
|
|
195
|
+
func_name = getattr(op, "_func_name")
|
|
196
|
+
kw = dict(
|
|
197
|
+
skipna=op.skipna, numeric_only=op.numeric_only, bool_only=op.bool_only
|
|
198
|
+
)
|
|
199
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
200
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
201
|
+
|
|
202
|
+
def _call_groupby_level(self, df, level):
|
|
203
|
+
return df.groupby(level=level).agg(
|
|
204
|
+
self.get_reduction_callable(self), method=self.method
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _call_dataframe(self, df):
|
|
208
|
+
axis = getattr(self, "axis", None) or 0
|
|
209
|
+
level = getattr(self, "level", None)
|
|
210
|
+
skipna = getattr(self, "skipna", True)
|
|
211
|
+
numeric_only = getattr(self, "numeric_only", None)
|
|
212
|
+
bool_only = getattr(self, "bool_only", None)
|
|
213
|
+
self.axis = axis = validate_axis(axis, df)
|
|
214
|
+
func_name = getattr(self, "_func_name")
|
|
215
|
+
|
|
216
|
+
if level is not None and axis == 1:
|
|
217
|
+
raise NotImplementedError("Not support specify level for axis==1")
|
|
218
|
+
|
|
219
|
+
if func_name == "size":
|
|
220
|
+
reduced = pd.Series(
|
|
221
|
+
np.zeros(df.shape[1 - axis]),
|
|
222
|
+
index=df.dtypes.index if axis == 0 else None,
|
|
223
|
+
)
|
|
224
|
+
reduced_cols = list(reduced.index)
|
|
225
|
+
reduced_dtype = reduced.dtype
|
|
226
|
+
elif func_name == "custom_reduction":
|
|
227
|
+
empty_df = build_df(df, ensure_string=True)
|
|
228
|
+
reduced = getattr(self, "custom_reduction").__call_agg__(empty_df)
|
|
229
|
+
reduced_cols = list(reduced.index)
|
|
230
|
+
reduced_dtype = reduced.dtype
|
|
231
|
+
else:
|
|
232
|
+
reduced_cols, dtypes = [], []
|
|
233
|
+
for col, src_dt in df.dtypes.items():
|
|
234
|
+
dt = _get_df_reduction_dtype(
|
|
235
|
+
src_dt,
|
|
236
|
+
func_name,
|
|
237
|
+
axis=axis,
|
|
238
|
+
bool_only=bool_only,
|
|
239
|
+
skipna=skipna,
|
|
240
|
+
numeric_only=numeric_only,
|
|
241
|
+
)
|
|
242
|
+
if dt is not None:
|
|
243
|
+
reduced_cols.append(col)
|
|
244
|
+
dtypes.append(dt)
|
|
245
|
+
elif (
|
|
246
|
+
_level_reduction_keep_object
|
|
247
|
+
and numeric_only
|
|
248
|
+
and level is not None
|
|
249
|
+
and func_name in ("min", "max")
|
|
250
|
+
and src_dt == np.dtype(object)
|
|
251
|
+
): # pragma: no cover
|
|
252
|
+
reduced_cols.append(col)
|
|
253
|
+
dtypes.append(np.dtype(object))
|
|
254
|
+
if len(dtypes) == 0:
|
|
255
|
+
reduced_dtype = np.dtype("O")
|
|
256
|
+
elif all(dt == dtypes[0] for dt in dtypes):
|
|
257
|
+
reduced_dtype = dtypes[0]
|
|
258
|
+
else:
|
|
259
|
+
# as we already bypassed dtypes with same values,
|
|
260
|
+
# when has_mixed_bool is True, there are other dtypes
|
|
261
|
+
# other than bool.
|
|
262
|
+
has_mixed_bool = any(dt == np.dtype(bool) for dt in dtypes)
|
|
263
|
+
if _reduce_bool_as_object and has_mixed_bool:
|
|
264
|
+
reduced_dtype = np.dtype("O")
|
|
265
|
+
elif not all(isinstance(dt, np.dtype) for dt in dtypes):
|
|
266
|
+
# todo currently we return mixed dtypes as np.dtype('O').
|
|
267
|
+
# handle pandas Dtypes in the future more carefully.
|
|
268
|
+
reduced_dtype = np.dtype("O")
|
|
269
|
+
else:
|
|
270
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
271
|
+
|
|
272
|
+
reduced_dtype = find_common_type(dtypes)
|
|
273
|
+
|
|
274
|
+
if level is not None:
|
|
275
|
+
return self._call_groupby_level(df[reduced_cols], level)
|
|
276
|
+
|
|
277
|
+
if axis == 0:
|
|
278
|
+
reduced_shape = (len(reduced_cols),)
|
|
279
|
+
reduced_index_value = parse_index(pd.Index(reduced_cols), store_data=True)
|
|
280
|
+
else:
|
|
281
|
+
reduced_shape = (df.shape[0],)
|
|
282
|
+
reduced_index_value = parse_index(pd.RangeIndex(-1))
|
|
283
|
+
|
|
284
|
+
return self.new_series(
|
|
285
|
+
[df],
|
|
286
|
+
shape=reduced_shape,
|
|
287
|
+
dtype=reduced_dtype,
|
|
288
|
+
index_value=reduced_index_value,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
def _call_series(self, series):
|
|
292
|
+
level = getattr(self, "level", None)
|
|
293
|
+
axis = getattr(self, "axis", None)
|
|
294
|
+
skipna = getattr(self, "skipna", True)
|
|
295
|
+
numeric_only = getattr(self, "numeric_only", None)
|
|
296
|
+
bool_only = getattr(self, "bool_only", None)
|
|
297
|
+
self.axis = axis = validate_axis(axis or 0, series)
|
|
298
|
+
func_name = getattr(self, "_func_name")
|
|
299
|
+
|
|
300
|
+
if level is not None:
|
|
301
|
+
return self._call_groupby_level(series, level)
|
|
302
|
+
|
|
303
|
+
if func_name == "custom_reduction":
|
|
304
|
+
empty_series = build_series(series, ensure_string=True)
|
|
305
|
+
custom_reduction_obj = getattr(self, "custom_reduction")
|
|
306
|
+
result_dtype = getattr(custom_reduction_obj, "result_dtype", None)
|
|
307
|
+
if result_dtype is None:
|
|
308
|
+
result_scalar = custom_reduction_obj.__call_agg__(empty_series)
|
|
309
|
+
if hasattr(result_scalar, "to_pandas"): # pragma: no cover
|
|
310
|
+
result_scalar = result_scalar.to_pandas()
|
|
311
|
+
result_dtype = pd.Series(result_scalar).dtype
|
|
312
|
+
else:
|
|
313
|
+
result_dtype = _get_series_reduction_dtype(
|
|
314
|
+
series.dtype,
|
|
315
|
+
func_name,
|
|
316
|
+
axis=axis,
|
|
317
|
+
bool_only=bool_only,
|
|
318
|
+
numeric_only=numeric_only,
|
|
319
|
+
skipna=skipna,
|
|
320
|
+
)
|
|
321
|
+
return self.new_scalar([series], dtype=result_dtype)
|
|
322
|
+
|
|
323
|
+
def __call__(self, a):
|
|
324
|
+
if is_kernel_mode() and not getattr(self, "is_atomic", False):
|
|
325
|
+
return self.get_reduction_callable(self)(a)
|
|
326
|
+
|
|
327
|
+
if isinstance(a, DATAFRAME_TYPE):
|
|
328
|
+
return self._call_dataframe(a)
|
|
329
|
+
else:
|
|
330
|
+
return self._call_series(a)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
class DataFrameCumReductionMixin(DataFrameOperatorMixin):
|
|
334
|
+
def _call_dataframe(self, df):
|
|
335
|
+
axis = getattr(self, "axis", None) or 0
|
|
336
|
+
self.axis = axis = validate_axis(axis, df)
|
|
337
|
+
|
|
338
|
+
empty_df = build_empty_df(df.dtypes)
|
|
339
|
+
reduced_df = getattr(empty_df, getattr(self, "_func_name"))(axis=axis)
|
|
340
|
+
return self.new_dataframe(
|
|
341
|
+
[df],
|
|
342
|
+
shape=df.shape,
|
|
343
|
+
dtypes=reduced_df.dtypes,
|
|
344
|
+
index_value=df.index_value,
|
|
345
|
+
columns_value=df.columns_value,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _call_series(self, series):
|
|
349
|
+
axis = getattr(self, "axis", None) or 0
|
|
350
|
+
if axis == "index":
|
|
351
|
+
axis = 0
|
|
352
|
+
self.axis = axis
|
|
353
|
+
|
|
354
|
+
return self.new_series(
|
|
355
|
+
[series],
|
|
356
|
+
shape=series.shape,
|
|
357
|
+
dtype=series.dtype,
|
|
358
|
+
name=series.name,
|
|
359
|
+
index_value=series.index_value,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
def __call__(self, a):
|
|
363
|
+
if isinstance(a, DATAFRAME_TYPE):
|
|
364
|
+
return self._call_dataframe(a)
|
|
365
|
+
else:
|
|
366
|
+
return self._call_series(a)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
class CustomReduction:
|
|
370
|
+
name: Optional[str]
|
|
371
|
+
output_limit: Optional[int]
|
|
372
|
+
kwds: Dict
|
|
373
|
+
|
|
374
|
+
# set to True when pre() already performs aggregation
|
|
375
|
+
pre_with_agg = False
|
|
376
|
+
|
|
377
|
+
def __init__(self, name=None, is_gpu=None):
|
|
378
|
+
self.name = name or "<custom>"
|
|
379
|
+
self.output_limit = 1
|
|
380
|
+
self._is_gpu = is_gpu
|
|
381
|
+
|
|
382
|
+
@property
|
|
383
|
+
def __name__(self):
|
|
384
|
+
return self.name
|
|
385
|
+
|
|
386
|
+
@property
|
|
387
|
+
def result_dtype(self):
|
|
388
|
+
return None
|
|
389
|
+
|
|
390
|
+
def __call__(self, value):
|
|
391
|
+
if isinstance(value, ENTITY_TYPE):
|
|
392
|
+
from .custom_reduction import build_custom_reduction_result
|
|
393
|
+
|
|
394
|
+
return build_custom_reduction_result(value, self)
|
|
395
|
+
return self.__call_agg__(value)
|
|
396
|
+
|
|
397
|
+
def __call_agg__(self, value):
|
|
398
|
+
r = self.pre(value)
|
|
399
|
+
if not isinstance(r, tuple):
|
|
400
|
+
r = (r,)
|
|
401
|
+
# update output limit into actual size
|
|
402
|
+
self.output_limit = len(r)
|
|
403
|
+
|
|
404
|
+
# only perform aggregation when pre() does not perform aggregation
|
|
405
|
+
if not self.pre_with_agg:
|
|
406
|
+
r = self.agg(*r)
|
|
407
|
+
if not isinstance(r, tuple):
|
|
408
|
+
r = (r,)
|
|
409
|
+
|
|
410
|
+
r = self.post(*r)
|
|
411
|
+
return r
|
|
412
|
+
|
|
413
|
+
def is_gpu(self):
|
|
414
|
+
return self._is_gpu if not is_build_mode() else False
|
|
415
|
+
|
|
416
|
+
def pre(self, value): # noqa: R0201 # pylint: disable=no-self-use
|
|
417
|
+
return (value,)
|
|
418
|
+
|
|
419
|
+
def agg(self, *values): # noqa: R0201 # pylint: disable=no-self-use
|
|
420
|
+
raise NotImplementedError
|
|
421
|
+
|
|
422
|
+
def post(self, *value): # noqa: R0201 # pylint: disable=no-self-use
|
|
423
|
+
assert len(value) == 1
|
|
424
|
+
return value[0]
|
|
425
|
+
|
|
426
|
+
def __maxframe_tokenize__(self):
|
|
427
|
+
import cloudpickle
|
|
428
|
+
|
|
429
|
+
return cloudpickle.dumps(self)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class BuiltinReduction(Serializable, CustomReduction):
|
|
433
|
+
name: Optional[str] = StringField("name", default=None)
|
|
434
|
+
output_limit: Optional[int] = Int32Field("output_limit", default=1)
|
|
435
|
+
kwds: Dict = DictField("kwds", default_factory=dict)
|
|
436
|
+
_is_gpu: bool = BoolField("is_gpu", default=False)
|
|
437
|
+
|
|
438
|
+
def __init__(self, name=None, is_gpu=None, **kw):
|
|
439
|
+
output_limit = kw.pop("output_limit", 1)
|
|
440
|
+
Serializable.__init__(
|
|
441
|
+
self, name=name, output_limit=output_limit, _is_gpu=is_gpu, **kw
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
def __maxframe_tokenize__(self):
|
|
445
|
+
return type(self), self.name, self.kwds, self._is_gpu
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
class ReductionPreStep(NamedTuple):
|
|
449
|
+
input_key: str
|
|
450
|
+
output_key: str
|
|
451
|
+
columns: Optional[List[str]]
|
|
452
|
+
func_idl: bytes
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
class ReductionAggStep(NamedTuple):
|
|
456
|
+
input_key: str
|
|
457
|
+
raw_func_name: Optional[str]
|
|
458
|
+
step_func_name: Optional[str]
|
|
459
|
+
map_func_name: Optional[str]
|
|
460
|
+
agg_func_name: Optional[str]
|
|
461
|
+
custom_reduction: Optional[CustomReduction]
|
|
462
|
+
output_key: str
|
|
463
|
+
output_limit: int
|
|
464
|
+
kwds: Dict[str, Any]
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
class ReductionPostStep(NamedTuple):
|
|
468
|
+
input_keys: List[str]
|
|
469
|
+
output_key: str
|
|
470
|
+
func_name: str
|
|
471
|
+
columns: Optional[List[str]]
|
|
472
|
+
func_idl: bytes
|
|
473
|
+
post_func_aliases: Optional[List[str]] = None
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
class ReductionSteps(NamedTuple):
|
|
477
|
+
pre_funcs: List[ReductionPreStep]
|
|
478
|
+
agg_funcs: List[ReductionAggStep]
|
|
479
|
+
post_funcs: List[ReductionPostStep]
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
# lookup table for numpy arithmetic operators in pandas
|
|
483
|
+
_func_name_converts = dict(
|
|
484
|
+
greater="gt",
|
|
485
|
+
greater_equal="ge",
|
|
486
|
+
less="lt",
|
|
487
|
+
less_equal="le",
|
|
488
|
+
equal="eq",
|
|
489
|
+
not_equal="ne",
|
|
490
|
+
true_divide="truediv",
|
|
491
|
+
floor_divide="floordiv",
|
|
492
|
+
power="pow",
|
|
493
|
+
subtract="sub",
|
|
494
|
+
multiply="mul",
|
|
495
|
+
)
|
|
496
|
+
_func_compile_cache = dict() # type: Dict[str, ReductionSteps]
|
|
497
|
+
|
|
498
|
+
_idl_primitive_types = (
|
|
499
|
+
type(None),
|
|
500
|
+
int,
|
|
501
|
+
float,
|
|
502
|
+
bool,
|
|
503
|
+
str,
|
|
504
|
+
bytes,
|
|
505
|
+
np.integer,
|
|
506
|
+
np.bool_,
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
IN_VAR_IDL_OP = "in_var"
|
|
510
|
+
OUT_VAR_IDL_OP = "out_var"
|
|
511
|
+
MASK_VAR_IDL_OP = "mask"
|
|
512
|
+
WHERE_VAR_IDL_OP = "where"
|
|
513
|
+
LET_VAR_OP = "let"
|
|
514
|
+
UNARY_IDL_OP_PREFIX = "unary:"
|
|
515
|
+
BINARY_IDL_OP_PREFIX = "bin:"
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
class ReductionCompiler:
|
|
519
|
+
def __init__(self, axis=0):
|
|
520
|
+
self._axis = axis
|
|
521
|
+
|
|
522
|
+
self._key_to_tileable = dict()
|
|
523
|
+
self._output_tileables = []
|
|
524
|
+
self._lambda_counter = 0
|
|
525
|
+
self._custom_counter = 0
|
|
526
|
+
self._func_cache = dict()
|
|
527
|
+
|
|
528
|
+
self._compiled_funcs = []
|
|
529
|
+
self._output_key_to_pre_steps = dict()
|
|
530
|
+
self._output_key_to_pre_cols = dict()
|
|
531
|
+
self._output_key_to_agg_steps = dict()
|
|
532
|
+
self._output_key_to_post_steps = dict()
|
|
533
|
+
self._output_key_to_post_cols = dict()
|
|
534
|
+
self._output_key_to_col_func_mapping = dict()
|
|
535
|
+
|
|
536
|
+
@classmethod
|
|
537
|
+
def _check_function_valid(cls, func):
|
|
538
|
+
if isinstance(func, functools.partial):
|
|
539
|
+
return cls._check_function_valid(func.func)
|
|
540
|
+
elif not hasattr(func, "__code__"):
|
|
541
|
+
return
|
|
542
|
+
|
|
543
|
+
func_code = func.__code__
|
|
544
|
+
func_vars = {n: func.__globals__.get(n) for n in func_code.co_names}
|
|
545
|
+
if func.__closure__:
|
|
546
|
+
func_vars.update(
|
|
547
|
+
{
|
|
548
|
+
n: cell.cell_contents
|
|
549
|
+
for n, cell in zip(func_code.co_freevars, func.__closure__)
|
|
550
|
+
}
|
|
551
|
+
)
|
|
552
|
+
# external MaxFrame objects shall not be referenced
|
|
553
|
+
for var_name, val in func_vars.items():
|
|
554
|
+
if isinstance(val, ENTITY_TYPE):
|
|
555
|
+
raise ValueError(
|
|
556
|
+
f"Variable {var_name} used by {func.__name__} "
|
|
557
|
+
"cannot be a MaxFrame object"
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
@staticmethod
|
|
561
|
+
def _update_col_dict(col_dict: Dict, key: str, cols: List):
|
|
562
|
+
if key in col_dict:
|
|
563
|
+
existing_cols = col_dict[key]
|
|
564
|
+
if existing_cols is not None:
|
|
565
|
+
existing_col_set = set(existing_cols)
|
|
566
|
+
col_dict[key].extend([c for c in cols if c not in existing_col_set])
|
|
567
|
+
else:
|
|
568
|
+
col_dict[key] = list(cols) if cols is not None else None
|
|
569
|
+
|
|
570
|
+
def add_function(self, func, ndim, cols=None, func_name=None):
|
|
571
|
+
from .aggregation import _agg_functions
|
|
572
|
+
|
|
573
|
+
cols = cols if cols is not None and self._axis == 0 else None
|
|
574
|
+
|
|
575
|
+
func_name = func_name or getattr(func, "__name__", None)
|
|
576
|
+
if func_name == "<lambda>" or func_name is None:
|
|
577
|
+
func_name = f"<lambda_{self._lambda_counter}>"
|
|
578
|
+
self._lambda_counter += 1
|
|
579
|
+
if func_name == "<custom>" or func_name is None:
|
|
580
|
+
func_name = f"<custom_{self._custom_counter}>"
|
|
581
|
+
self._custom_counter += 1
|
|
582
|
+
|
|
583
|
+
if inspect.isbuiltin(func):
|
|
584
|
+
raw_func_name = getattr(func, "__name__", "N/A")
|
|
585
|
+
if raw_func_name in _agg_functions:
|
|
586
|
+
func = _agg_functions[raw_func_name]
|
|
587
|
+
else:
|
|
588
|
+
raise ValueError(f"Unexpected built-in function {raw_func_name}")
|
|
589
|
+
|
|
590
|
+
compile_result = self._compile_function(func, func_name, ndim=ndim)
|
|
591
|
+
self._compiled_funcs.append(compile_result)
|
|
592
|
+
|
|
593
|
+
for step in compile_result.pre_funcs:
|
|
594
|
+
self._output_key_to_pre_steps[step.output_key] = step
|
|
595
|
+
self._update_col_dict(self._output_key_to_pre_cols, step.output_key, cols)
|
|
596
|
+
|
|
597
|
+
for step in compile_result.agg_funcs:
|
|
598
|
+
self._output_key_to_agg_steps[step.output_key] = step
|
|
599
|
+
|
|
600
|
+
for step in compile_result.post_funcs:
|
|
601
|
+
self._output_key_to_post_steps[step.output_key] = step
|
|
602
|
+
self._update_col_dict(self._output_key_to_post_cols, step.output_key, cols)
|
|
603
|
+
|
|
604
|
+
if cols is not None:
|
|
605
|
+
col_name_map = (
|
|
606
|
+
self._output_key_to_col_func_mapping.get(step.output_key) or {}
|
|
607
|
+
)
|
|
608
|
+
for col in cols:
|
|
609
|
+
col_name_map[col] = func_name
|
|
610
|
+
self._output_key_to_col_func_mapping[step.output_key] = col_name_map
|
|
611
|
+
|
|
612
|
+
@staticmethod
|
|
613
|
+
def _build_mock_return_object(func, input_dtype, ndim):
|
|
614
|
+
from ..initializer import DataFrame as MaxDataFrame
|
|
615
|
+
from ..initializer import Series as MaxSeries
|
|
616
|
+
|
|
617
|
+
if ndim == 1:
|
|
618
|
+
mock_series = build_empty_series(np.dtype(input_dtype))
|
|
619
|
+
mock_obj = MaxSeries(mock_series)
|
|
620
|
+
else:
|
|
621
|
+
mock_df = build_empty_df(
|
|
622
|
+
pd.Series([np.dtype(input_dtype)] * 2, index=["A", "B"])
|
|
623
|
+
)
|
|
624
|
+
mock_obj = MaxDataFrame(mock_df)
|
|
625
|
+
|
|
626
|
+
# calc target tileable to generate DAG
|
|
627
|
+
with enter_mode(kernel=True, build=False):
|
|
628
|
+
return func(mock_obj)
|
|
629
|
+
|
|
630
|
+
@enter_mode(build=True)
|
|
631
|
+
def _compile_function(self, func, func_name=None, ndim=1) -> ReductionSteps:
|
|
632
|
+
from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
|
|
633
|
+
from ...tensor.misc import TensorWhere
|
|
634
|
+
from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
|
|
635
|
+
from ..datasource.dataframe import DataFrameDataSource
|
|
636
|
+
from ..datasource.series import SeriesDataSource
|
|
637
|
+
from ..indexing.where import DataFrameWhere
|
|
638
|
+
from .custom_reduction import DataFrameCustomReduction
|
|
639
|
+
|
|
640
|
+
func_token = tokenize(func, self._axis, func_name, ndim)
|
|
641
|
+
if func_token in _func_compile_cache:
|
|
642
|
+
return _func_compile_cache[func_token]
|
|
643
|
+
custom_reduction = func if isinstance(func, CustomReduction) else None
|
|
644
|
+
|
|
645
|
+
self._check_function_valid(func)
|
|
646
|
+
|
|
647
|
+
try:
|
|
648
|
+
func_ret = self._build_mock_return_object(func, float, ndim=ndim)
|
|
649
|
+
except (TypeError, AttributeError):
|
|
650
|
+
# we may encounter lambda x: x.str.cat(...), use an object series to test
|
|
651
|
+
func_ret = self._build_mock_return_object(func, object, ndim=1)
|
|
652
|
+
output_limit = getattr(func, "output_limit", None) or 1
|
|
653
|
+
|
|
654
|
+
if not isinstance(func_ret, ENTITY_TYPE):
|
|
655
|
+
raise ValueError(
|
|
656
|
+
f"Custom function should return a MaxFrame object, not {type(func_ret)}"
|
|
657
|
+
)
|
|
658
|
+
if func_ret.ndim >= ndim:
|
|
659
|
+
raise ValueError("Function not a reduction")
|
|
660
|
+
|
|
661
|
+
agg_graph = func_ret.build_graph()
|
|
662
|
+
agg_tileables = set(t for t in agg_graph if getattr(t.op, "is_atomic", False))
|
|
663
|
+
# check operators before aggregation
|
|
664
|
+
for t in agg_graph.dfs(
|
|
665
|
+
list(agg_tileables), visit_predicate="all", reverse=True
|
|
666
|
+
):
|
|
667
|
+
if t not in agg_tileables and not isinstance(
|
|
668
|
+
t.op,
|
|
669
|
+
(
|
|
670
|
+
DataFrameUnaryOp,
|
|
671
|
+
DataFrameBinOp,
|
|
672
|
+
TensorUnaryOp,
|
|
673
|
+
TensorBinOp,
|
|
674
|
+
TensorWhere,
|
|
675
|
+
DataFrameWhere,
|
|
676
|
+
DataFrameDataSource,
|
|
677
|
+
SeriesDataSource,
|
|
678
|
+
),
|
|
679
|
+
):
|
|
680
|
+
raise ValueError(f"Cannot support operator {type(t.op)} in aggregation")
|
|
681
|
+
# check operators after aggregation
|
|
682
|
+
for t in agg_graph.dfs(list(agg_tileables), visit_predicate="all"):
|
|
683
|
+
if t not in agg_tileables and not isinstance(
|
|
684
|
+
t.op,
|
|
685
|
+
(
|
|
686
|
+
DataFrameUnaryOp,
|
|
687
|
+
DataFrameBinOp,
|
|
688
|
+
TensorWhere,
|
|
689
|
+
DataFrameWhere,
|
|
690
|
+
TensorUnaryOp,
|
|
691
|
+
TensorBinOp,
|
|
692
|
+
),
|
|
693
|
+
):
|
|
694
|
+
raise ValueError(f"Cannot support operator {type(t.op)} in aggregation")
|
|
695
|
+
|
|
696
|
+
pre_funcs, agg_funcs, post_funcs = [], [], []
|
|
697
|
+
visited_inputs = set()
|
|
698
|
+
# collect aggregations and their inputs
|
|
699
|
+
for t in agg_tileables:
|
|
700
|
+
agg_input_key = t.inputs[0].key
|
|
701
|
+
|
|
702
|
+
# collect agg names
|
|
703
|
+
step_func_name = getattr(t.op, "_func_name")
|
|
704
|
+
if step_func_name in ("count", "size"):
|
|
705
|
+
map_func_name, agg_func_name = step_func_name, "sum"
|
|
706
|
+
else:
|
|
707
|
+
map_func_name, agg_func_name = step_func_name, step_func_name
|
|
708
|
+
|
|
709
|
+
if isinstance(t.op, DataFrameCustomReduction):
|
|
710
|
+
custom_reduction = custom_reduction or t.op.custom_reduction
|
|
711
|
+
|
|
712
|
+
# build agg description
|
|
713
|
+
agg_funcs.append(
|
|
714
|
+
ReductionAggStep(
|
|
715
|
+
agg_input_key,
|
|
716
|
+
func_name,
|
|
717
|
+
step_func_name,
|
|
718
|
+
map_func_name,
|
|
719
|
+
agg_func_name,
|
|
720
|
+
custom_reduction,
|
|
721
|
+
t.key,
|
|
722
|
+
output_limit,
|
|
723
|
+
t.op.get_reduction_args(axis=self._axis),
|
|
724
|
+
)
|
|
725
|
+
)
|
|
726
|
+
# collect agg input and build function
|
|
727
|
+
if agg_input_key not in visited_inputs:
|
|
728
|
+
visited_inputs.add(agg_input_key)
|
|
729
|
+
initial_inputs = list(t.inputs[0].build_graph().iter_indep())
|
|
730
|
+
assert len(initial_inputs) == 1
|
|
731
|
+
input_key = initial_inputs[0].key
|
|
732
|
+
|
|
733
|
+
func_idl, _ = self._generate_function_idl(t.inputs[0])
|
|
734
|
+
pre_funcs.append(
|
|
735
|
+
ReductionPreStep(
|
|
736
|
+
input_key, agg_input_key, None, msgpack.dumps(func_idl)
|
|
737
|
+
)
|
|
738
|
+
)
|
|
739
|
+
# collect function output after agg
|
|
740
|
+
func_idl, input_keys = self._generate_function_idl(func_ret)
|
|
741
|
+
post_funcs.append(
|
|
742
|
+
ReductionPostStep(
|
|
743
|
+
input_keys, func_ret.key, func_name, None, msgpack.dumps(func_idl)
|
|
744
|
+
)
|
|
745
|
+
)
|
|
746
|
+
if len(_func_compile_cache) > 100: # pragma: no cover
|
|
747
|
+
_func_compile_cache.pop(next(iter(_func_compile_cache.keys())))
|
|
748
|
+
result = _func_compile_cache[func_token] = ReductionSteps(
|
|
749
|
+
pre_funcs, agg_funcs, post_funcs
|
|
750
|
+
)
|
|
751
|
+
return result
|
|
752
|
+
|
|
753
|
+
def _generate_function_idl(self, out_tileable: TileableType) -> Tuple[List, List]:
|
|
754
|
+
"""
|
|
755
|
+
Generate function IDL from tileable DAG
|
|
756
|
+
|
|
757
|
+
IDL Format: [
|
|
758
|
+
["in_var", "input_var_name"],
|
|
759
|
+
["op", "op_output_var", ["op_arg1", "op_arg2"], {"op_key1": "op_key2"}],
|
|
760
|
+
["out_var", "output_var_name"],
|
|
761
|
+
]
|
|
762
|
+
"""
|
|
763
|
+
from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
|
|
764
|
+
from ...tensor.datasource import Scalar
|
|
765
|
+
from ...tensor.misc import TensorWhere
|
|
766
|
+
from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
|
|
767
|
+
from ..datasource.dataframe import DataFrameDataSource
|
|
768
|
+
from ..datasource.series import SeriesDataSource
|
|
769
|
+
from ..indexing.where import DataFrameWhere
|
|
770
|
+
|
|
771
|
+
input_key_to_var = OrderedDict()
|
|
772
|
+
local_key_to_var = dict()
|
|
773
|
+
idl_lines = []
|
|
774
|
+
|
|
775
|
+
input_op_types = (
|
|
776
|
+
DataFrameDataSource,
|
|
777
|
+
SeriesDataSource,
|
|
778
|
+
DataFrameReduction,
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
def _gen_expr_str(t):
|
|
782
|
+
# generate code for t
|
|
783
|
+
if t.key in local_key_to_var:
|
|
784
|
+
return
|
|
785
|
+
|
|
786
|
+
if isinstance(t.op, input_op_types):
|
|
787
|
+
# tileable is an input arg, build a function variable
|
|
788
|
+
if t.key not in input_key_to_var: # pragma: no branch
|
|
789
|
+
input_key_to_var[t.key] = local_key_to_var[
|
|
790
|
+
t.key
|
|
791
|
+
] = f"invar{len(input_key_to_var)}"
|
|
792
|
+
else:
|
|
793
|
+
for inp in t.inputs:
|
|
794
|
+
_gen_expr_str(inp)
|
|
795
|
+
|
|
796
|
+
var_name = local_key_to_var[t.key] = f"var{len(local_key_to_var)}"
|
|
797
|
+
keys_to_vars = {inp.key: local_key_to_var[inp.key] for inp in t.inputs}
|
|
798
|
+
|
|
799
|
+
def _interpret_var(v):
|
|
800
|
+
v = get_item_if_scalar(v)
|
|
801
|
+
# get representation for variables
|
|
802
|
+
if hasattr(v, "key"):
|
|
803
|
+
return keys_to_vars[v.key]
|
|
804
|
+
elif isinstance(v, _idl_primitive_types):
|
|
805
|
+
return v
|
|
806
|
+
else:
|
|
807
|
+
raise NotImplementedError(
|
|
808
|
+
f"Type {type(v)} currently not interpretable"
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
func_name = getattr(t.op, "_func_name", None)
|
|
812
|
+
if func_name is None:
|
|
813
|
+
func_name = getattr(t.op, "_bit_func_name", None)
|
|
814
|
+
# handle function name differences between numpy and pandas arithmetic ops
|
|
815
|
+
if func_name in _func_name_converts:
|
|
816
|
+
func_name = _func_name_converts[func_name]
|
|
817
|
+
|
|
818
|
+
# build given different op types
|
|
819
|
+
if isinstance(t.op, (DataFrameUnaryOp, TensorUnaryOp)):
|
|
820
|
+
val = _interpret_var(t.inputs[0])
|
|
821
|
+
statements = [
|
|
822
|
+
[UNARY_IDL_OP_PREFIX + func_name, var_name, [val], {}]
|
|
823
|
+
]
|
|
824
|
+
elif isinstance(t.op, (DataFrameBinOp, TensorBinOp)):
|
|
825
|
+
lhs, rhs = t.op.lhs, t.op.rhs
|
|
826
|
+
op_axis = (
|
|
827
|
+
1 - self._axis
|
|
828
|
+
if hasattr(lhs, "ndim")
|
|
829
|
+
and hasattr(rhs, "ndim")
|
|
830
|
+
and lhs.ndim != rhs.ndim
|
|
831
|
+
else None
|
|
832
|
+
)
|
|
833
|
+
lhs = _interpret_var(lhs)
|
|
834
|
+
rhs = _interpret_var(rhs)
|
|
835
|
+
axis_arg = {"axis": op_axis} if op_axis is not None else {}
|
|
836
|
+
statements = [
|
|
837
|
+
[
|
|
838
|
+
BINARY_IDL_OP_PREFIX + func_name,
|
|
839
|
+
var_name,
|
|
840
|
+
[lhs, rhs],
|
|
841
|
+
{},
|
|
842
|
+
axis_arg,
|
|
843
|
+
]
|
|
844
|
+
]
|
|
845
|
+
elif isinstance(t.op, TensorWhere):
|
|
846
|
+
cond = _interpret_var(t.op.condition)
|
|
847
|
+
x = _interpret_var(t.op.x)
|
|
848
|
+
y = _interpret_var(t.op.y)
|
|
849
|
+
statements = [[WHERE_VAR_IDL_OP, var_name, [cond, x, y], {}]]
|
|
850
|
+
elif isinstance(t.op, DataFrameWhere):
|
|
851
|
+
func_name = (
|
|
852
|
+
MASK_VAR_IDL_OP if t.op.replace_true else WHERE_VAR_IDL_OP
|
|
853
|
+
)
|
|
854
|
+
inp = _interpret_var(t.op.input)
|
|
855
|
+
cond = _interpret_var(t.op.cond)
|
|
856
|
+
other = _interpret_var(t.op.other)
|
|
857
|
+
statements = [
|
|
858
|
+
[
|
|
859
|
+
func_name,
|
|
860
|
+
var_name,
|
|
861
|
+
[cond, inp, other],
|
|
862
|
+
{"axis": t.op.axis, "level": t.op.level},
|
|
863
|
+
]
|
|
864
|
+
]
|
|
865
|
+
elif isinstance(t.op, Scalar):
|
|
866
|
+
# for scalar inputs of other operators
|
|
867
|
+
data = _interpret_var(t.op.data)
|
|
868
|
+
statements = [[LET_VAR_OP, var_name, [data]]]
|
|
869
|
+
else: # pragma: no cover
|
|
870
|
+
raise NotImplementedError(
|
|
871
|
+
f"Does not support aggregating on {type(t.op)}"
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
idl_lines.extend(statements)
|
|
875
|
+
|
|
876
|
+
_gen_expr_str(out_tileable)
|
|
877
|
+
|
|
878
|
+
input_idls = [
|
|
879
|
+
[IN_VAR_IDL_OP, var_name] for var_name in input_key_to_var.values()
|
|
880
|
+
]
|
|
881
|
+
output_idls = [[OUT_VAR_IDL_OP, local_key_to_var[out_tileable.key]]]
|
|
882
|
+
return input_idls + idl_lines + output_idls, list(input_key_to_var.keys())
|
|
883
|
+
|
|
884
|
+
def compile(self) -> ReductionSteps:
|
|
885
|
+
pre_funcs, agg_funcs, post_funcs = [], [], []
|
|
886
|
+
referred_cols = set()
|
|
887
|
+
for key, step in self._output_key_to_pre_steps.items():
|
|
888
|
+
cols = self._output_key_to_pre_cols[key]
|
|
889
|
+
if cols:
|
|
890
|
+
referred_cols.update(cols)
|
|
891
|
+
pre_funcs.append(
|
|
892
|
+
ReductionPreStep(step.input_key, step.output_key, cols, step.func_idl)
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
for step in self._output_key_to_agg_steps.values():
|
|
896
|
+
agg_funcs.append(step)
|
|
897
|
+
|
|
898
|
+
for key, step in self._output_key_to_post_steps.items():
|
|
899
|
+
post_cols = self._output_key_to_post_cols[key]
|
|
900
|
+
func_renames = None
|
|
901
|
+
if post_cols:
|
|
902
|
+
col_map = self._output_key_to_col_func_mapping.get(key)
|
|
903
|
+
if col_map:
|
|
904
|
+
func_renames = [col_map[c] for c in post_cols]
|
|
905
|
+
|
|
906
|
+
func_name = step.func_name
|
|
907
|
+
if self._lambda_counter == 1 and step.func_name == "<lambda_0>":
|
|
908
|
+
func_name = "<lambda>"
|
|
909
|
+
if self._custom_counter == 1 and step.func_name == "<custom_0>":
|
|
910
|
+
func_name = "<custom>"
|
|
911
|
+
|
|
912
|
+
post_funcs.append(
|
|
913
|
+
ReductionPostStep(
|
|
914
|
+
step.input_keys,
|
|
915
|
+
step.output_key,
|
|
916
|
+
func_name,
|
|
917
|
+
post_cols,
|
|
918
|
+
step.func_idl,
|
|
919
|
+
func_renames,
|
|
920
|
+
)
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
return ReductionSteps(pre_funcs, agg_funcs, post_funcs)
|