maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,1728 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import dataclasses
|
|
16
|
+
import functools
|
|
17
|
+
import inspect
|
|
18
|
+
import itertools
|
|
19
|
+
import logging
|
|
20
|
+
import operator
|
|
21
|
+
import sys
|
|
22
|
+
from contextlib import contextmanager
|
|
23
|
+
from numbers import Integral
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Callable, List, Optional
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
import pandas as pd
|
|
28
|
+
from pandas.core.dtypes.inference import is_dict_like, is_list_like
|
|
29
|
+
|
|
30
|
+
from ..config.validators import dtype_backend_validator
|
|
31
|
+
from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
|
|
32
|
+
from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
|
|
33
|
+
from ..lib.mmh3 import hash as mmh_hash
|
|
34
|
+
from ..udf import MarkedFunction
|
|
35
|
+
from ..utils import (
|
|
36
|
+
ModulePlaceholder,
|
|
37
|
+
is_full_slice,
|
|
38
|
+
lazy_import,
|
|
39
|
+
make_dtypes,
|
|
40
|
+
quiet_stdio,
|
|
41
|
+
sbytes,
|
|
42
|
+
tokenize,
|
|
43
|
+
validate_and_adjust_resource_ratio,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
if TYPE_CHECKING:
|
|
47
|
+
from .core import IndexValue
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
import pyarrow as pa
|
|
51
|
+
except ImportError: # pragma: no cover
|
|
52
|
+
pa = ModulePlaceholder("pyarrow")
|
|
53
|
+
|
|
54
|
+
if TYPE_CHECKING:
|
|
55
|
+
from .operators import DataFrameOperator
|
|
56
|
+
|
|
57
|
+
cudf = lazy_import("cudf", rename="cudf")
|
|
58
|
+
logger = logging.getLogger(__name__)
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
from ..lib.dtypes_extension import ArrowDtype
|
|
62
|
+
except ImportError:
|
|
63
|
+
ArrowDtype = None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def hash_index(index, size):
|
|
67
|
+
def func(x, size):
|
|
68
|
+
return mmh_hash(sbytes(x)) % size
|
|
69
|
+
|
|
70
|
+
f = functools.partial(func, size=size)
|
|
71
|
+
idx_to_grouped = index.groupby(index.map(f))
|
|
72
|
+
return [idx_to_grouped.get(i, list()) for i in range(size)]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def hash_dataframe_on(df, on, size, level=None):
|
|
76
|
+
if on is None:
|
|
77
|
+
idx = df.index
|
|
78
|
+
if level is not None:
|
|
79
|
+
idx = idx.to_frame(False)[level]
|
|
80
|
+
if cudf and isinstance(idx, cudf.Index): # pragma: no cover
|
|
81
|
+
idx = idx.to_pandas()
|
|
82
|
+
hashed_label = pd.util.hash_pandas_object(idx, categorize=False)
|
|
83
|
+
elif callable(on):
|
|
84
|
+
# todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
|
|
85
|
+
hashed_label = pd.util.hash_pandas_object(df.index.map(on), categorize=False)
|
|
86
|
+
else:
|
|
87
|
+
if isinstance(on, list):
|
|
88
|
+
to_concat = []
|
|
89
|
+
for v in on:
|
|
90
|
+
if isinstance(v, pd.Series):
|
|
91
|
+
to_concat.append(v)
|
|
92
|
+
else:
|
|
93
|
+
to_concat.append(df[v])
|
|
94
|
+
data = pd.concat(to_concat, axis=1)
|
|
95
|
+
else:
|
|
96
|
+
data = df[on]
|
|
97
|
+
hashed_label = pd.util.hash_pandas_object(data, index=False, categorize=False)
|
|
98
|
+
idx_to_grouped = pd.RangeIndex(0, len(hashed_label)).groupby(hashed_label % size)
|
|
99
|
+
return [idx_to_grouped.get(i, pd.Index([])) for i in range(size)]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def hash_dtypes(dtypes, size):
|
|
103
|
+
hashed_indexes = hash_index(dtypes.index, size)
|
|
104
|
+
return [dtypes[index] for index in hashed_indexes]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def sort_dataframe_inplace(df, *axis, **kw):
|
|
108
|
+
for ax in axis:
|
|
109
|
+
df.sort_index(axis=ax, inplace=True, **kw)
|
|
110
|
+
return df
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@functools.lru_cache(1)
|
|
114
|
+
def _get_range_index_type():
|
|
115
|
+
if cudf is not None:
|
|
116
|
+
return pd.RangeIndex, cudf.RangeIndex
|
|
117
|
+
else:
|
|
118
|
+
return pd.RangeIndex
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@functools.lru_cache(1)
|
|
122
|
+
def _get_multi_index_type():
|
|
123
|
+
if cudf is not None:
|
|
124
|
+
return pd.MultiIndex, cudf.MultiIndex
|
|
125
|
+
else:
|
|
126
|
+
return pd.MultiIndex
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _get_range_index_start(pd_range_index):
|
|
130
|
+
try:
|
|
131
|
+
return pd_range_index.start
|
|
132
|
+
except AttributeError: # pragma: no cover
|
|
133
|
+
return pd_range_index._start
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _get_range_index_stop(pd_range_index):
|
|
137
|
+
try:
|
|
138
|
+
return pd_range_index.stop
|
|
139
|
+
except AttributeError: # pragma: no cover
|
|
140
|
+
return pd_range_index._stop
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _get_range_index_step(pd_range_index):
|
|
144
|
+
try:
|
|
145
|
+
return pd_range_index.step
|
|
146
|
+
except AttributeError: # pragma: no cover
|
|
147
|
+
pass
|
|
148
|
+
try: # pragma: no cover
|
|
149
|
+
return pd_range_index._step
|
|
150
|
+
except AttributeError: # pragma: no cover
|
|
151
|
+
return 1 # cudf does not support step arg
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def is_pd_range_empty(pd_range_index):
|
|
155
|
+
start, stop, step = (
|
|
156
|
+
_get_range_index_start(pd_range_index),
|
|
157
|
+
_get_range_index_stop(pd_range_index),
|
|
158
|
+
_get_range_index_step(pd_range_index),
|
|
159
|
+
)
|
|
160
|
+
return (start >= stop and step >= 0) or (start <= stop and step < 0)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def parse_index(index_value, *args, store_data=False, key=None):
|
|
164
|
+
from .core import IndexValue
|
|
165
|
+
|
|
166
|
+
def _extract_property(index, tp, ret_data):
|
|
167
|
+
kw = {
|
|
168
|
+
"_min_val": _get_index_min(index),
|
|
169
|
+
"_max_val": _get_index_max(index),
|
|
170
|
+
"_min_val_close": True,
|
|
171
|
+
"_max_val_close": True,
|
|
172
|
+
"_key": key or _tokenize_index(index, *args),
|
|
173
|
+
}
|
|
174
|
+
if ret_data:
|
|
175
|
+
kw["_data"] = index.values
|
|
176
|
+
for field in tp._FIELDS:
|
|
177
|
+
if field in kw or field == "_data":
|
|
178
|
+
continue
|
|
179
|
+
val = getattr(index, field.lstrip("_"), None)
|
|
180
|
+
if val is not None:
|
|
181
|
+
kw[field] = val
|
|
182
|
+
return kw
|
|
183
|
+
|
|
184
|
+
def _tokenize_index(index, *token_objects):
|
|
185
|
+
if not index.empty:
|
|
186
|
+
return tokenize(index)
|
|
187
|
+
else:
|
|
188
|
+
return tokenize(index, *token_objects)
|
|
189
|
+
|
|
190
|
+
def _get_index_min(index):
|
|
191
|
+
try:
|
|
192
|
+
return index.min()
|
|
193
|
+
except (ValueError, AttributeError):
|
|
194
|
+
if isinstance(index, pd.IntervalIndex):
|
|
195
|
+
return None
|
|
196
|
+
raise
|
|
197
|
+
except TypeError:
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
def _get_index_max(index):
|
|
201
|
+
try:
|
|
202
|
+
return index.max()
|
|
203
|
+
except (ValueError, AttributeError):
|
|
204
|
+
if isinstance(index, pd.IntervalIndex):
|
|
205
|
+
return None
|
|
206
|
+
raise
|
|
207
|
+
except TypeError:
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
def _serialize_index(index):
|
|
211
|
+
tp = getattr(IndexValue, type(index).__name__)
|
|
212
|
+
properties = _extract_property(index, tp, store_data)
|
|
213
|
+
properties["_name"] = index.name
|
|
214
|
+
return tp(**properties)
|
|
215
|
+
|
|
216
|
+
def _serialize_range_index(index):
|
|
217
|
+
if is_pd_range_empty(index):
|
|
218
|
+
properties = {
|
|
219
|
+
"_is_monotonic_increasing": True,
|
|
220
|
+
"_is_monotonic_decreasing": False,
|
|
221
|
+
"_is_unique": True,
|
|
222
|
+
"_min_val": _get_index_min(index),
|
|
223
|
+
"_max_val": _get_index_max(index),
|
|
224
|
+
"_min_val_close": True,
|
|
225
|
+
"_max_val_close": False,
|
|
226
|
+
"_key": key or _tokenize_index(index, *args),
|
|
227
|
+
"_name": index.name,
|
|
228
|
+
"_dtype": index.dtype,
|
|
229
|
+
}
|
|
230
|
+
else:
|
|
231
|
+
properties = _extract_property(index, IndexValue.RangeIndex, False)
|
|
232
|
+
return IndexValue.RangeIndex(
|
|
233
|
+
_slice=slice(
|
|
234
|
+
_get_range_index_start(index),
|
|
235
|
+
_get_range_index_stop(index),
|
|
236
|
+
_get_range_index_step(index),
|
|
237
|
+
),
|
|
238
|
+
**properties,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def _serialize_multi_index(index):
|
|
242
|
+
kw = _extract_property(index, IndexValue.MultiIndex, store_data)
|
|
243
|
+
kw["_sortorder"] = index.sortorder
|
|
244
|
+
kw["_dtypes"] = [lev.dtype for lev in index.levels]
|
|
245
|
+
return IndexValue.MultiIndex(**kw)
|
|
246
|
+
|
|
247
|
+
if index_value is None:
|
|
248
|
+
return IndexValue(
|
|
249
|
+
_index_value=IndexValue.Index(
|
|
250
|
+
_is_monotonic_increasing=False,
|
|
251
|
+
_is_monotonic_decreasing=False,
|
|
252
|
+
_is_unique=False,
|
|
253
|
+
_min_val=None,
|
|
254
|
+
_max_val=None,
|
|
255
|
+
_min_val_close=True,
|
|
256
|
+
_max_val_close=True,
|
|
257
|
+
_key=key or tokenize(*args),
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
if hasattr(index_value, "to_pandas"): # pragma: no cover
|
|
261
|
+
# convert cudf.Index to pandas
|
|
262
|
+
index_value = index_value.to_pandas()
|
|
263
|
+
|
|
264
|
+
if isinstance(index_value, _get_range_index_type()):
|
|
265
|
+
return IndexValue(_index_value=_serialize_range_index(index_value))
|
|
266
|
+
elif isinstance(index_value, _get_multi_index_type()):
|
|
267
|
+
return IndexValue(_index_value=_serialize_multi_index(index_value))
|
|
268
|
+
else:
|
|
269
|
+
return IndexValue(_index_value=_serialize_index(index_value))
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def gen_unknown_index_value(index_value, *args, normalize_range_index=False):
|
|
273
|
+
"""
|
|
274
|
+
Generate new index value with the same likes of given index_value and args, but without any value.
|
|
275
|
+
|
|
276
|
+
Parameters
|
|
277
|
+
----------
|
|
278
|
+
index_value
|
|
279
|
+
Given index value.
|
|
280
|
+
args
|
|
281
|
+
Arguments for parse_index.
|
|
282
|
+
normalize_range_index
|
|
283
|
+
If normalize range index to normal index.
|
|
284
|
+
|
|
285
|
+
Returns
|
|
286
|
+
-------
|
|
287
|
+
New created range index value.
|
|
288
|
+
"""
|
|
289
|
+
pd_index = index_value.to_pandas()
|
|
290
|
+
if not normalize_range_index and isinstance(pd_index, pd.RangeIndex):
|
|
291
|
+
return parse_index(pd.RangeIndex(-1, name=pd_index.name), *args)
|
|
292
|
+
elif not isinstance(pd_index, pd.MultiIndex):
|
|
293
|
+
return parse_index(
|
|
294
|
+
pd.Index([], dtype=pd_index.dtype, name=pd_index.name), *args
|
|
295
|
+
)
|
|
296
|
+
else:
|
|
297
|
+
i = pd.MultiIndex.from_arrays(
|
|
298
|
+
[c[:0] for c in pd_index.levels], names=pd_index.names
|
|
299
|
+
)
|
|
300
|
+
return parse_index(i, *args)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def split_monotonic_index_min_max(
|
|
304
|
+
left_min_max, left_increase, right_min_max, right_increase
|
|
305
|
+
):
|
|
306
|
+
"""
|
|
307
|
+
Split the original two min_max into new min_max. Each min_max should be a list
|
|
308
|
+
in which each item should be a 4-tuple indicates that this chunk's min value,
|
|
309
|
+
whether the min value is close, the max value, and whether the max value is close.
|
|
310
|
+
The return value would be a nested list, each item is a list
|
|
311
|
+
indicates that how this chunk should be split into.
|
|
312
|
+
|
|
313
|
+
:param left_min_max: the left min_max
|
|
314
|
+
:param left_increase: if the original data of left is increased
|
|
315
|
+
:param right_min_max: the right min_max
|
|
316
|
+
:param right_increase: if the original data of right is increased
|
|
317
|
+
:return: nested list in which each item indicates how min_max is split
|
|
318
|
+
|
|
319
|
+
>>> left_min_max = [(0, True, 3, True), (4, True, 8, True), (12, True, 18, True),
|
|
320
|
+
... (20, True, 22, True)]
|
|
321
|
+
>>> right_min_max = [(2, True, 6, True), (7, True, 9, True), (10, True, 14, True),
|
|
322
|
+
... (18, True, 19, True)]
|
|
323
|
+
>>> l, r = split_monotonic_index_min_max(left_min_max, True, right_min_max, True)
|
|
324
|
+
>>> l
|
|
325
|
+
[[(0, True, 2, False), (2, True, 3, True)], [(3, False, 4, False), (4, True, 6, True), (6, False, 7, False),
|
|
326
|
+
(7, True, 8, True)], [(8, False, 9, True), (10, True, 12, False), (12, True, 14, True), (14, False, 18, False),
|
|
327
|
+
(18, True, 18, True)], [(18, False, 19, True), [20, True, 22, True]]]
|
|
328
|
+
>>> r
|
|
329
|
+
[[(0, True, 2, False), (2, True, 3, True), (3, False, 4, False), (4, True, 6, True)],
|
|
330
|
+
[(6, False, 7, False), (7, True, 8, True), (8, False, 9, True)], [(10, True, 12, False), (12, True, 14, True)],
|
|
331
|
+
[(14, False, 18, False), (18, True, 18, True), (18, False, 19, True), [20, True, 22, True]]]
|
|
332
|
+
"""
|
|
333
|
+
left_idx_to_min_max = [[] for _ in left_min_max]
|
|
334
|
+
right_idx_to_min_max = [[] for _ in right_min_max]
|
|
335
|
+
left_curr_min_max = list(left_min_max[0])
|
|
336
|
+
right_curr_min_max = list(right_min_max[0])
|
|
337
|
+
left_curr_idx = right_curr_idx = 0
|
|
338
|
+
left_terminate = right_terminate = False
|
|
339
|
+
|
|
340
|
+
while not left_terminate or not right_terminate:
|
|
341
|
+
if left_terminate:
|
|
342
|
+
left_idx_to_min_max[left_curr_idx].append(tuple(right_curr_min_max))
|
|
343
|
+
right_idx_to_min_max[right_curr_idx].append(tuple(right_curr_min_max))
|
|
344
|
+
if right_curr_idx + 1 >= len(right_min_max):
|
|
345
|
+
right_terminate = True
|
|
346
|
+
else:
|
|
347
|
+
right_curr_idx += 1
|
|
348
|
+
right_curr_min_max = list(right_min_max[right_curr_idx])
|
|
349
|
+
elif right_terminate:
|
|
350
|
+
right_idx_to_min_max[right_curr_idx].append(tuple(left_curr_min_max))
|
|
351
|
+
left_idx_to_min_max[left_curr_idx].append(tuple(left_curr_min_max))
|
|
352
|
+
if left_curr_idx + 1 >= len(left_min_max):
|
|
353
|
+
left_terminate = True
|
|
354
|
+
else:
|
|
355
|
+
left_curr_idx += 1
|
|
356
|
+
left_curr_min_max = list(left_min_max[left_curr_idx])
|
|
357
|
+
elif left_curr_min_max[0] < right_curr_min_max[0]:
|
|
358
|
+
# left min < right min
|
|
359
|
+
right_min = [right_curr_min_max[0], not right_curr_min_max[1]]
|
|
360
|
+
max_val = min(left_curr_min_max[2:], right_min)
|
|
361
|
+
assert len(max_val) == 2
|
|
362
|
+
min_max = (
|
|
363
|
+
left_curr_min_max[0],
|
|
364
|
+
left_curr_min_max[1],
|
|
365
|
+
max_val[0],
|
|
366
|
+
max_val[1],
|
|
367
|
+
)
|
|
368
|
+
left_idx_to_min_max[left_curr_idx].append(min_max)
|
|
369
|
+
right_idx_to_min_max[right_curr_idx].append(min_max)
|
|
370
|
+
if left_curr_min_max[2:] == max_val:
|
|
371
|
+
# left max < right min
|
|
372
|
+
if left_curr_idx + 1 >= len(left_min_max):
|
|
373
|
+
left_terminate = True
|
|
374
|
+
else:
|
|
375
|
+
left_curr_idx += 1
|
|
376
|
+
left_curr_min_max = list(left_min_max[left_curr_idx])
|
|
377
|
+
else:
|
|
378
|
+
# from left min(left min close) to right min(exclude right min close)
|
|
379
|
+
left_curr_min_max[:2] = right_curr_min_max[:2]
|
|
380
|
+
elif left_curr_min_max[0] > right_curr_min_max[0]:
|
|
381
|
+
# left min > right min
|
|
382
|
+
left_min = [left_curr_min_max[0], not left_curr_min_max[1]]
|
|
383
|
+
max_val = min(right_curr_min_max[2:], left_min)
|
|
384
|
+
min_max = (
|
|
385
|
+
right_curr_min_max[0],
|
|
386
|
+
right_curr_min_max[1],
|
|
387
|
+
max_val[0],
|
|
388
|
+
max_val[1],
|
|
389
|
+
)
|
|
390
|
+
left_idx_to_min_max[left_curr_idx].append(min_max)
|
|
391
|
+
right_idx_to_min_max[right_curr_idx].append(min_max)
|
|
392
|
+
if right_curr_min_max[2:] == max_val:
|
|
393
|
+
# right max < left min
|
|
394
|
+
if right_curr_idx + 1 >= len(right_min_max):
|
|
395
|
+
right_terminate = True
|
|
396
|
+
else:
|
|
397
|
+
right_curr_idx += 1
|
|
398
|
+
right_curr_min_max = list(right_min_max[right_curr_idx])
|
|
399
|
+
else:
|
|
400
|
+
# from left min(left min close) to right min(exclude right min close)
|
|
401
|
+
right_curr_min_max[:2] = left_curr_min_max[:2]
|
|
402
|
+
else:
|
|
403
|
+
# left min == right min
|
|
404
|
+
max_val = min(left_curr_min_max[2:], right_curr_min_max[2:])
|
|
405
|
+
assert len(max_val) == 2
|
|
406
|
+
min_max = (
|
|
407
|
+
left_curr_min_max[0],
|
|
408
|
+
left_curr_min_max[1],
|
|
409
|
+
max_val[0],
|
|
410
|
+
max_val[1],
|
|
411
|
+
)
|
|
412
|
+
left_idx_to_min_max[left_curr_idx].append(min_max)
|
|
413
|
+
right_idx_to_min_max[right_curr_idx].append(min_max)
|
|
414
|
+
if max_val == left_curr_min_max[2:]:
|
|
415
|
+
if left_curr_idx + 1 >= len(left_min_max):
|
|
416
|
+
left_terminate = True
|
|
417
|
+
else:
|
|
418
|
+
left_curr_idx += 1
|
|
419
|
+
left_curr_min_max = list(left_min_max[left_curr_idx])
|
|
420
|
+
else:
|
|
421
|
+
left_curr_min_max[:2] = max_val[0], not max_val[1]
|
|
422
|
+
if max_val == right_curr_min_max[2:]:
|
|
423
|
+
if right_curr_idx + 1 >= len(right_min_max):
|
|
424
|
+
right_terminate = True
|
|
425
|
+
else:
|
|
426
|
+
right_curr_idx += 1
|
|
427
|
+
right_curr_min_max = list(right_min_max[right_curr_idx])
|
|
428
|
+
else:
|
|
429
|
+
right_curr_min_max[:2] = max_val[0], not max_val[1]
|
|
430
|
+
|
|
431
|
+
if left_increase is False:
|
|
432
|
+
left_idx_to_min_max = list(reversed(left_idx_to_min_max))
|
|
433
|
+
if right_increase is False:
|
|
434
|
+
right_idx_to_min_max = list(reversed(right_idx_to_min_max))
|
|
435
|
+
|
|
436
|
+
return left_idx_to_min_max, right_idx_to_min_max
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def build_split_idx_to_origin_idx(splits, increase=True):
|
|
440
|
+
# splits' len is equal to the original chunk size on a specified axis,
|
|
441
|
+
# splits is sth like [[(0, True, 2, True), (2, False, 3, True)]]
|
|
442
|
+
# which means there is one input chunk, and will be split into 2 out chunks
|
|
443
|
+
# in this function, we want to build a new dict from the out chunk index to
|
|
444
|
+
# the original chunk index and the inner position, like {0: (0, 0), 1: (0, 1)}
|
|
445
|
+
if increase is False:
|
|
446
|
+
splits = list(reversed(splits))
|
|
447
|
+
out_idx = itertools.count(0)
|
|
448
|
+
res = dict()
|
|
449
|
+
for origin_idx, _ in enumerate(splits):
|
|
450
|
+
for pos in range(len(splits[origin_idx])):
|
|
451
|
+
if increase is False:
|
|
452
|
+
o_idx = len(splits) - origin_idx - 1
|
|
453
|
+
else:
|
|
454
|
+
o_idx = origin_idx
|
|
455
|
+
res[next(out_idx)] = o_idx, pos
|
|
456
|
+
return res
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _generate_value(dtype, fill_value):
|
|
460
|
+
if ArrowDtype and isinstance(dtype, ArrowDtype):
|
|
461
|
+
return _generate_value(dtype.pyarrow_dtype, fill_value)
|
|
462
|
+
|
|
463
|
+
if isinstance(dtype, pa.ListType):
|
|
464
|
+
return [_generate_value(dtype.value_type, fill_value)]
|
|
465
|
+
elif isinstance(dtype, pa.MapType):
|
|
466
|
+
return [
|
|
467
|
+
(
|
|
468
|
+
_generate_value(dtype.key_type, fill_value),
|
|
469
|
+
_generate_value(dtype.item_type, fill_value),
|
|
470
|
+
)
|
|
471
|
+
]
|
|
472
|
+
elif isinstance(dtype, pa.StructType):
|
|
473
|
+
result = {}
|
|
474
|
+
for i in range(dtype.num_fields):
|
|
475
|
+
field = dtype[i]
|
|
476
|
+
result[field.name] = _generate_value(field.type, fill_value)
|
|
477
|
+
return result
|
|
478
|
+
elif isinstance(dtype, pa.DataType):
|
|
479
|
+
return pa.array([_generate_value(dtype.to_pandas_dtype(), fill_value)]).cast(
|
|
480
|
+
dtype
|
|
481
|
+
)[0]
|
|
482
|
+
elif isinstance(dtype, ExternalBlobDtype):
|
|
483
|
+
return SolidBlob(str(fill_value).encode())
|
|
484
|
+
|
|
485
|
+
# special handle for datetime64 and timedelta64
|
|
486
|
+
dispatch = {
|
|
487
|
+
np.datetime64: pd.Timestamp,
|
|
488
|
+
np.timedelta64: pd.Timedelta,
|
|
489
|
+
pd.CategoricalDtype.type: lambda x: pd.CategoricalDtype([x]),
|
|
490
|
+
# for object, we do not know the actual dtype,
|
|
491
|
+
# just convert to str for common usage
|
|
492
|
+
np.object_: lambda x: str(fill_value),
|
|
493
|
+
}
|
|
494
|
+
# otherwise, just use dtype.type itself to convert
|
|
495
|
+
target_dtype = getattr(dtype, "type", dtype)
|
|
496
|
+
convert = dispatch.get(target_dtype, target_dtype)
|
|
497
|
+
return convert(fill_value)
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def build_empty_df(dtypes, index=None):
|
|
501
|
+
columns = dtypes.index
|
|
502
|
+
length = len(index) if index is not None else 0
|
|
503
|
+
record = [[_generate_value(dtype, 1) for dtype in dtypes]] * max(1, length)
|
|
504
|
+
|
|
505
|
+
# duplicate column may exist,
|
|
506
|
+
# so use RangeIndex first
|
|
507
|
+
df = pd.DataFrame(record, columns=range(len(dtypes)), index=index)
|
|
508
|
+
df = df.astype({i: dt for i, dt in enumerate(dtypes)})
|
|
509
|
+
df.columns = columns
|
|
510
|
+
return df[:length] if len(df) > length else df
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
|
|
514
|
+
from .core import INDEX_TYPE, SERIES_TYPE
|
|
515
|
+
|
|
516
|
+
dfs = []
|
|
517
|
+
if not isinstance(size, (list, tuple)):
|
|
518
|
+
sizes = [size]
|
|
519
|
+
else:
|
|
520
|
+
sizes = size
|
|
521
|
+
|
|
522
|
+
if not isinstance(fill_value, (list, tuple)):
|
|
523
|
+
fill_values = [fill_value]
|
|
524
|
+
else:
|
|
525
|
+
fill_values = fill_value
|
|
526
|
+
|
|
527
|
+
dtypes = (
|
|
528
|
+
pd.Series([df_obj.dtype], index=[df_obj.name])
|
|
529
|
+
if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
|
|
530
|
+
else df_obj.dtypes
|
|
531
|
+
)
|
|
532
|
+
for size, fill_value in zip(sizes, fill_values):
|
|
533
|
+
record = [[_generate_value(dtype, fill_value) for dtype in dtypes]] * size
|
|
534
|
+
df = (
|
|
535
|
+
pd.DataFrame(record)
|
|
536
|
+
.astype(dtypes.reset_index(drop=True))
|
|
537
|
+
.set_axis(dtypes.index, axis=1)
|
|
538
|
+
)
|
|
539
|
+
if len(record) != 0: # `columns` is empty in some cases
|
|
540
|
+
target_index = (
|
|
541
|
+
df_obj.index_value.to_pandas()
|
|
542
|
+
if hasattr(df_obj, "index_value")
|
|
543
|
+
else df_obj.index
|
|
544
|
+
)
|
|
545
|
+
if isinstance(target_index, pd.MultiIndex):
|
|
546
|
+
index_val = tuple(
|
|
547
|
+
_generate_value(level.dtype, fill_value)
|
|
548
|
+
for level in target_index.levels
|
|
549
|
+
)
|
|
550
|
+
df.index = pd.MultiIndex.from_tuples(
|
|
551
|
+
[index_val] * size, names=target_index.names
|
|
552
|
+
)
|
|
553
|
+
else:
|
|
554
|
+
index_val = _generate_value(target_index.dtype, fill_value)
|
|
555
|
+
df.index = pd.Index([index_val] * size, name=target_index.name).astype(
|
|
556
|
+
target_index.dtype
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# make sure dtypes correct
|
|
560
|
+
for i, dtype in enumerate(dtypes):
|
|
561
|
+
s = df.iloc[:, i]
|
|
562
|
+
if not pd.api.types.is_dtype_equal(s.dtype, dtype):
|
|
563
|
+
df[df.columns[i]] = s.astype(dtype)
|
|
564
|
+
dfs.append(df)
|
|
565
|
+
if len(dfs) == 1:
|
|
566
|
+
ret_df = dfs[0]
|
|
567
|
+
else:
|
|
568
|
+
ret_df = pd.concat(dfs)
|
|
569
|
+
|
|
570
|
+
if ensure_string:
|
|
571
|
+
obj_dtypes = dtypes[dtypes == np.dtype("O")]
|
|
572
|
+
ret_df[obj_dtypes.index] = ret_df[obj_dtypes.index].radd("O")
|
|
573
|
+
return ret_df
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def build_empty_series(dtype, index=None, name=None):
|
|
577
|
+
length = len(index) if index is not None else 0
|
|
578
|
+
return pd.Series(
|
|
579
|
+
[_generate_value(dtype, 1) for _ in range(length)],
|
|
580
|
+
dtype=dtype,
|
|
581
|
+
index=index,
|
|
582
|
+
name=name,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def build_series(
|
|
587
|
+
series_obj=None,
|
|
588
|
+
fill_value=1,
|
|
589
|
+
size=1,
|
|
590
|
+
name=None,
|
|
591
|
+
ensure_string=False,
|
|
592
|
+
dtype=None,
|
|
593
|
+
index=None,
|
|
594
|
+
):
|
|
595
|
+
seriess = []
|
|
596
|
+
if not isinstance(size, (list, tuple)):
|
|
597
|
+
sizes = [size]
|
|
598
|
+
else:
|
|
599
|
+
sizes = size
|
|
600
|
+
|
|
601
|
+
if not isinstance(fill_value, (list, tuple)):
|
|
602
|
+
fill_values = [fill_value]
|
|
603
|
+
else:
|
|
604
|
+
fill_values = fill_value
|
|
605
|
+
|
|
606
|
+
if series_obj is not None:
|
|
607
|
+
dtype = series_obj.dtype
|
|
608
|
+
try:
|
|
609
|
+
series_index = series_obj.index_value.to_pandas()[:0]
|
|
610
|
+
except AttributeError:
|
|
611
|
+
series_index = series_obj.index[:0]
|
|
612
|
+
else:
|
|
613
|
+
series_index = index[:0] if index is not None else None
|
|
614
|
+
|
|
615
|
+
name = name or getattr(series_obj, "name", None)
|
|
616
|
+
for size, fill_value in zip(sizes, fill_values):
|
|
617
|
+
empty_series = build_empty_series(dtype, name=name, index=series_index)
|
|
618
|
+
record = _generate_value(dtype, fill_value)
|
|
619
|
+
if isinstance(empty_series.index, pd.MultiIndex):
|
|
620
|
+
index = tuple(
|
|
621
|
+
_generate_value(level.dtype, fill_value)
|
|
622
|
+
for level in empty_series.index.levels
|
|
623
|
+
)
|
|
624
|
+
empty_series = empty_series.reindex(
|
|
625
|
+
index=pd.MultiIndex.from_tuples([index], names=empty_series.index.names)
|
|
626
|
+
)
|
|
627
|
+
empty_series.iloc[0] = record
|
|
628
|
+
else:
|
|
629
|
+
if isinstance(empty_series.index.dtype, pd.CategoricalDtype):
|
|
630
|
+
index = None
|
|
631
|
+
else:
|
|
632
|
+
index = _generate_value(empty_series.index.dtype, fill_value)
|
|
633
|
+
empty_series.loc[index] = record
|
|
634
|
+
|
|
635
|
+
empty_series = pd.concat([empty_series] * size)
|
|
636
|
+
# make sure dtype correct for MultiIndex
|
|
637
|
+
empty_series = empty_series.astype(dtype, copy=False)
|
|
638
|
+
seriess.append(empty_series)
|
|
639
|
+
|
|
640
|
+
if len(seriess) == 1:
|
|
641
|
+
ret_series = seriess[0]
|
|
642
|
+
else:
|
|
643
|
+
ret_series = pd.concat(seriess)
|
|
644
|
+
|
|
645
|
+
if ensure_string and dtype == np.dtype("O"):
|
|
646
|
+
ret_series = ret_series.radd("O")
|
|
647
|
+
return ret_series
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def infer_index_value(left_index_value, right_index_value, level=None):
|
|
651
|
+
from .core import IndexValue
|
|
652
|
+
|
|
653
|
+
if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
|
|
654
|
+
right_index_value.value, IndexValue.RangeIndex
|
|
655
|
+
):
|
|
656
|
+
if left_index_value.value.slice == right_index_value.value.slice:
|
|
657
|
+
return left_index_value
|
|
658
|
+
return parse_index(
|
|
659
|
+
pd.Index([], dtype=np.int64), left_index_value, right_index_value
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
# when left index and right index is identical, and both of them are elements unique,
|
|
663
|
+
# we can infer that the out index should be identical also
|
|
664
|
+
if (
|
|
665
|
+
left_index_value.is_unique
|
|
666
|
+
and right_index_value.is_unique
|
|
667
|
+
and left_index_value.key == right_index_value.key
|
|
668
|
+
):
|
|
669
|
+
return left_index_value
|
|
670
|
+
|
|
671
|
+
left_index = left_index_value.to_pandas()
|
|
672
|
+
right_index = right_index_value.to_pandas()
|
|
673
|
+
out_index = left_index.join(right_index, level=level)[:0]
|
|
674
|
+
return parse_index(out_index, left_index_value, right_index_value)
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def indexing_index_value(index_value, indexes, store_data=False, rechunk=False):
|
|
678
|
+
pd_index = index_value.to_pandas()
|
|
679
|
+
# when rechunk is True, the output index shall be treated
|
|
680
|
+
# different from the input one
|
|
681
|
+
if not rechunk and isinstance(indexes, slice) and is_full_slice(indexes):
|
|
682
|
+
return index_value
|
|
683
|
+
elif not index_value.has_value():
|
|
684
|
+
new_index_value = parse_index(pd_index, indexes, store_data=store_data)
|
|
685
|
+
new_index_value._index_value._min_val = index_value.min_val
|
|
686
|
+
new_index_value._index_value._min_val_close = index_value.min_val_close
|
|
687
|
+
new_index_value._index_value._max_val = index_value.max_val
|
|
688
|
+
new_index_value._index_value._max_val_close = index_value.max_val_close
|
|
689
|
+
return new_index_value
|
|
690
|
+
else:
|
|
691
|
+
if isinstance(indexes, Integral):
|
|
692
|
+
return parse_index(pd_index[[indexes]], store_data=store_data)
|
|
693
|
+
elif isinstance(indexes, Entity):
|
|
694
|
+
if isinstance(pd_index, pd.RangeIndex):
|
|
695
|
+
return parse_index(
|
|
696
|
+
pd.RangeIndex(-1), indexes, index_value, store_data=False
|
|
697
|
+
)
|
|
698
|
+
else:
|
|
699
|
+
return parse_index(
|
|
700
|
+
type(pd_index)([]), indexes, index_value, store_data=False
|
|
701
|
+
)
|
|
702
|
+
if isinstance(indexes, tuple):
|
|
703
|
+
return parse_index(pd_index[list(indexes)], store_data=store_data)
|
|
704
|
+
else:
|
|
705
|
+
return parse_index(pd_index[indexes], store_data=store_data)
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def merge_index_value(to_merge_index_values: dict, store_data: bool = False):
|
|
709
|
+
"""
|
|
710
|
+
Merge index value according to their chunk index.
|
|
711
|
+
|
|
712
|
+
Parameters
|
|
713
|
+
----------
|
|
714
|
+
to_merge_index_values : dict
|
|
715
|
+
index to index_value
|
|
716
|
+
store_data : bool
|
|
717
|
+
store data in index_value
|
|
718
|
+
|
|
719
|
+
Returns
|
|
720
|
+
-------
|
|
721
|
+
merged_index_value
|
|
722
|
+
"""
|
|
723
|
+
|
|
724
|
+
pd_index = None
|
|
725
|
+
min_val, min_val_close, max_val, max_val_close = None, None, None, None
|
|
726
|
+
for _, chunk_index_value in sorted(to_merge_index_values.items()):
|
|
727
|
+
if pd_index is None:
|
|
728
|
+
pd_index = chunk_index_value.to_pandas()
|
|
729
|
+
min_val, min_val_close, max_val, max_val_close = (
|
|
730
|
+
chunk_index_value.min_val,
|
|
731
|
+
chunk_index_value.min_val_close,
|
|
732
|
+
chunk_index_value.max_val,
|
|
733
|
+
chunk_index_value.max_val_close,
|
|
734
|
+
)
|
|
735
|
+
else:
|
|
736
|
+
cur_pd_index = chunk_index_value.to_pandas()
|
|
737
|
+
if store_data or (
|
|
738
|
+
isinstance(pd_index, pd.RangeIndex)
|
|
739
|
+
and isinstance(cur_pd_index, pd.RangeIndex)
|
|
740
|
+
and cur_pd_index.step == pd_index.step
|
|
741
|
+
and cur_pd_index.start == pd_index.stop
|
|
742
|
+
):
|
|
743
|
+
# range index that is continuous
|
|
744
|
+
pd_index = pd_index.append(cur_pd_index)
|
|
745
|
+
else:
|
|
746
|
+
pd_index = pd.Index([], dtype=pd_index.dtype)
|
|
747
|
+
if chunk_index_value.min_val is not None:
|
|
748
|
+
try:
|
|
749
|
+
if min_val is None or min_val > chunk_index_value.min_val:
|
|
750
|
+
min_val = chunk_index_value.min_val
|
|
751
|
+
min_val_close = chunk_index_value.min_val_close
|
|
752
|
+
except TypeError:
|
|
753
|
+
# min_value has different types that cannot compare
|
|
754
|
+
# just stop compare
|
|
755
|
+
continue
|
|
756
|
+
if chunk_index_value.max_val is not None:
|
|
757
|
+
if max_val is None or max_val < chunk_index_value.max_val:
|
|
758
|
+
max_val = chunk_index_value.max_val
|
|
759
|
+
max_val_close = chunk_index_value.max_val_close
|
|
760
|
+
|
|
761
|
+
index_value = parse_index(pd_index, store_data=store_data)
|
|
762
|
+
if not index_value.has_value():
|
|
763
|
+
index_value._index_value._min_val = min_val
|
|
764
|
+
index_value._index_value._min_val_close = min_val_close
|
|
765
|
+
index_value._index_value._max_val = max_val
|
|
766
|
+
index_value._index_value._max_val_close = max_val_close
|
|
767
|
+
return index_value
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def is_decimal128_dtype(dtype):
|
|
771
|
+
return isinstance(dtype, ArrowDtype) and isinstance(
|
|
772
|
+
dtype.pyarrow_dtype, pa.Decimal128Type
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
def is_decimal256_dtype(dtype):
|
|
777
|
+
return isinstance(dtype, ArrowDtype) and isinstance(
|
|
778
|
+
dtype.pyarrow_dtype, pa.Decimal256Type
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def decimal_128_to_256_dtype(dtype):
|
|
783
|
+
if not is_decimal128_dtype(dtype):
|
|
784
|
+
return dtype
|
|
785
|
+
return ArrowDtype(
|
|
786
|
+
pa.decimal256(dtype.pyarrow_dtype.precision, dtype.pyarrow_dtype.scale)
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def safe_decimal_256_to_128_dtype(dtype):
|
|
791
|
+
if not is_decimal256_dtype(dtype) or dtype.pyarrow_dtype.precision > 38:
|
|
792
|
+
return dtype
|
|
793
|
+
return ArrowDtype(
|
|
794
|
+
pa.decimal128(dtype.pyarrow_dtype.precision, dtype.pyarrow_dtype.scale)
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def _infer_dtypes(left_dtypes, right_dtypes, operator):
|
|
799
|
+
left = build_empty_df(left_dtypes)
|
|
800
|
+
right = build_empty_df(right_dtypes)
|
|
801
|
+
return operator(left, right).dtypes
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def infer_dtypes(left_dtypes, right_dtypes, operator):
|
|
805
|
+
try:
|
|
806
|
+
return _infer_dtypes(left_dtypes, right_dtypes, operator)
|
|
807
|
+
except pa.ArrowInvalid as exc:
|
|
808
|
+
if "Decimal precision" not in str(exc):
|
|
809
|
+
raise
|
|
810
|
+
# automatic upgrade to decimal256 type and downgrade
|
|
811
|
+
# to decimal128 type where possible
|
|
812
|
+
left_dtypes = left_dtypes.map(decimal_128_to_256_dtype)
|
|
813
|
+
right_dtypes = right_dtypes.map(decimal_128_to_256_dtype)
|
|
814
|
+
return _infer_dtypes(left_dtypes, right_dtypes, operator).map(
|
|
815
|
+
safe_decimal_256_to_128_dtype
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def _infer_dtype(left_dtype, right_dtype, operator):
|
|
820
|
+
left = build_empty_series(left_dtype)
|
|
821
|
+
right = build_empty_series(right_dtype)
|
|
822
|
+
return operator(left, right).dtype
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
@functools.lru_cache(100)
|
|
826
|
+
def infer_dtype(left_dtype, right_dtype, operator):
|
|
827
|
+
try:
|
|
828
|
+
return _infer_dtype(left_dtype, right_dtype, operator)
|
|
829
|
+
except pa.ArrowInvalid as exc:
|
|
830
|
+
if "Decimal precision" not in str(exc):
|
|
831
|
+
raise
|
|
832
|
+
# automatic upgrade to decimal256 type
|
|
833
|
+
return _infer_dtype(
|
|
834
|
+
decimal_128_to_256_dtype(left_dtype),
|
|
835
|
+
decimal_128_to_256_dtype(right_dtype),
|
|
836
|
+
operator,
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def filter_dtypes(dtypes, column_min_max):
|
|
841
|
+
left_filter = operator.ge if column_min_max[1] else operator.gt
|
|
842
|
+
left = left_filter(dtypes.index, column_min_max[0])
|
|
843
|
+
right_filter = operator.le if column_min_max[3] else operator.lt
|
|
844
|
+
right = right_filter(dtypes.index, column_min_max[2])
|
|
845
|
+
return dtypes[left & right]
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
def in_range_index(i, pd_range_index):
|
|
849
|
+
"""
|
|
850
|
+
Check whether the input `i` is within `pd_range_index` which is a pd.RangeIndex.
|
|
851
|
+
"""
|
|
852
|
+
start, stop, step = (
|
|
853
|
+
_get_range_index_start(pd_range_index),
|
|
854
|
+
_get_range_index_stop(pd_range_index),
|
|
855
|
+
_get_range_index_step(pd_range_index),
|
|
856
|
+
)
|
|
857
|
+
if step > 0 and start <= i < stop and (i - start) % step == 0:
|
|
858
|
+
return True
|
|
859
|
+
if step < 0 and start >= i > stop and (start - i) % step == 0:
|
|
860
|
+
return True
|
|
861
|
+
return False
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
def wrap_notimplemented_exception(func):
|
|
865
|
+
@functools.wraps(func)
|
|
866
|
+
def wrapper(*args, **kwargs):
|
|
867
|
+
try:
|
|
868
|
+
return func(*args, **kwargs)
|
|
869
|
+
except NotImplementedError:
|
|
870
|
+
return NotImplemented
|
|
871
|
+
|
|
872
|
+
return wrapper
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
def validate_axis(axis, tileable=None):
|
|
876
|
+
if axis == "index":
|
|
877
|
+
axis = 0
|
|
878
|
+
elif axis == "columns":
|
|
879
|
+
axis = 1
|
|
880
|
+
|
|
881
|
+
illegal = False
|
|
882
|
+
try:
|
|
883
|
+
axis = operator.index(axis)
|
|
884
|
+
if axis < 0 or (tileable is not None and axis >= tileable.ndim):
|
|
885
|
+
illegal = True
|
|
886
|
+
except TypeError:
|
|
887
|
+
illegal = True
|
|
888
|
+
|
|
889
|
+
if illegal:
|
|
890
|
+
raise ValueError(f"No axis named {axis} for object type {type(tileable)}")
|
|
891
|
+
return axis
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def validate_axis_style_args(
|
|
895
|
+
data, args, kwargs, arg_name, method_name
|
|
896
|
+
): # pragma: no cover
|
|
897
|
+
"""Argument handler for mixed index, columns / axis functions
|
|
898
|
+
|
|
899
|
+
In an attempt to handle both `.method(index, columns)`, and
|
|
900
|
+
`.method(arg, axis=.)`, we have to do some bad things to argument
|
|
901
|
+
parsing. This translates all arguments to `{index=., columns=.}` style.
|
|
902
|
+
|
|
903
|
+
Parameters
|
|
904
|
+
----------
|
|
905
|
+
data : DataFrame
|
|
906
|
+
args : tuple
|
|
907
|
+
All positional arguments from the user
|
|
908
|
+
kwargs : dict
|
|
909
|
+
All keyword arguments from the user
|
|
910
|
+
arg_name, method_name : str
|
|
911
|
+
Used for better error messages
|
|
912
|
+
|
|
913
|
+
Returns
|
|
914
|
+
-------
|
|
915
|
+
kwargs : dict
|
|
916
|
+
A dictionary of keyword arguments. Doesn't modify ``kwargs``
|
|
917
|
+
inplace, so update them with the return value here.
|
|
918
|
+
"""
|
|
919
|
+
out = {}
|
|
920
|
+
# Goal: fill 'out' with index/columns-style arguments
|
|
921
|
+
# like out = {'index': foo, 'columns': bar}
|
|
922
|
+
|
|
923
|
+
# Start by validating for consistency
|
|
924
|
+
axes_names = ["index"] if data.ndim == 1 else ["index", "columns"]
|
|
925
|
+
if "axis" in kwargs and any(x in kwargs for x in axes_names):
|
|
926
|
+
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
|
|
927
|
+
raise TypeError(msg)
|
|
928
|
+
|
|
929
|
+
# First fill with explicit values provided by the user...
|
|
930
|
+
if arg_name in kwargs:
|
|
931
|
+
if args:
|
|
932
|
+
msg = f"{method_name} got multiple values for argument '{arg_name}'"
|
|
933
|
+
raise TypeError(msg)
|
|
934
|
+
|
|
935
|
+
axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
|
|
936
|
+
out[axis] = kwargs[arg_name]
|
|
937
|
+
|
|
938
|
+
# More user-provided arguments, now from kwargs
|
|
939
|
+
for k, v in kwargs.items():
|
|
940
|
+
try:
|
|
941
|
+
ax = axes_names[validate_axis(k, data)]
|
|
942
|
+
except ValueError:
|
|
943
|
+
pass
|
|
944
|
+
else:
|
|
945
|
+
out[ax] = v
|
|
946
|
+
|
|
947
|
+
# All user-provided kwargs have been handled now.
|
|
948
|
+
# Now we supplement with positional arguments, emitting warnings
|
|
949
|
+
# when there's ambiguity and raising when there's conflicts
|
|
950
|
+
|
|
951
|
+
if len(args) == 0:
|
|
952
|
+
pass # It's up to the function to decide if this is valid
|
|
953
|
+
elif len(args) == 1:
|
|
954
|
+
axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
|
|
955
|
+
out[axis] = args[0]
|
|
956
|
+
elif len(args) == 2:
|
|
957
|
+
if "axis" in kwargs:
|
|
958
|
+
# Unambiguously wrong
|
|
959
|
+
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
|
|
960
|
+
raise TypeError(msg)
|
|
961
|
+
|
|
962
|
+
msg = (
|
|
963
|
+
"Interpreting call\n\t'.{method_name}(a, b)' as "
|
|
964
|
+
"\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
|
|
965
|
+
"arguments to remove any ambiguity."
|
|
966
|
+
)
|
|
967
|
+
raise TypeError(msg.format(method_name=method_name))
|
|
968
|
+
else:
|
|
969
|
+
msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
|
|
970
|
+
raise TypeError(msg)
|
|
971
|
+
return out
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
def validate_output_types(**kwargs):
|
|
975
|
+
from ..core import OutputType
|
|
976
|
+
|
|
977
|
+
output_type = kwargs.pop("object_type", None) or kwargs.pop("output_type", None)
|
|
978
|
+
output_types = kwargs.pop("output_types", None) or (
|
|
979
|
+
[output_type] if output_type is not None else None
|
|
980
|
+
)
|
|
981
|
+
return (
|
|
982
|
+
[
|
|
983
|
+
getattr(OutputType, v.lower()) if isinstance(v, str) else v
|
|
984
|
+
for v in output_types
|
|
985
|
+
]
|
|
986
|
+
if output_types
|
|
987
|
+
else None
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def fetch_corner_data(df_or_series, session=None) -> pd.DataFrame:
|
|
992
|
+
"""
|
|
993
|
+
Fetch corner DataFrame or Series for repr usage.
|
|
994
|
+
|
|
995
|
+
:param df_or_series: DataFrame or Series
|
|
996
|
+
:return: corner DataFrame
|
|
997
|
+
"""
|
|
998
|
+
from .indexing.iloc import iloc
|
|
999
|
+
|
|
1000
|
+
max_rows = pd.get_option("display.max_rows")
|
|
1001
|
+
try:
|
|
1002
|
+
min_rows = pd.get_option("display.min_rows")
|
|
1003
|
+
min_rows = min(min_rows, max_rows)
|
|
1004
|
+
except KeyError: # pragma: no cover
|
|
1005
|
+
# display.min_rows is introduced in pandas 0.25
|
|
1006
|
+
min_rows = max_rows
|
|
1007
|
+
|
|
1008
|
+
index_size = None
|
|
1009
|
+
if (
|
|
1010
|
+
df_or_series.shape[0] > max_rows
|
|
1011
|
+
and df_or_series.shape[0] > min_rows // 2 * 2 + 2
|
|
1012
|
+
):
|
|
1013
|
+
# for pandas, greater than max_rows
|
|
1014
|
+
# will display min_rows
|
|
1015
|
+
# thus we fetch min_rows + 2 lines
|
|
1016
|
+
index_size = min_rows // 2 + 1
|
|
1017
|
+
|
|
1018
|
+
if index_size is None:
|
|
1019
|
+
return df_or_series._fetch(session=session)
|
|
1020
|
+
else:
|
|
1021
|
+
head = iloc(df_or_series)[:index_size]
|
|
1022
|
+
tail = iloc(df_or_series)[-index_size:]
|
|
1023
|
+
head_data, tail_data = ExecutableTuple([head, tail]).fetch(session=session)
|
|
1024
|
+
xdf = cudf if head.op.is_gpu() else pd
|
|
1025
|
+
return xdf.concat([head_data, tail_data], axis="index")
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
class ReprSeries(pd.Series):
|
|
1029
|
+
def __init__(self, corner_data, real_shape):
|
|
1030
|
+
super().__init__(corner_data)
|
|
1031
|
+
self._real_shape = real_shape
|
|
1032
|
+
|
|
1033
|
+
def __len__(self):
|
|
1034
|
+
# As we only fetch corner data to repr,
|
|
1035
|
+
# the length would be wrong and we have no way to control,
|
|
1036
|
+
# thus we just overwrite the length to show the real one
|
|
1037
|
+
return self._real_shape[0]
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
def filter_dtypes_by_index(dtypes, index):
|
|
1041
|
+
try:
|
|
1042
|
+
new_dtypes = dtypes.loc[index].dropna()
|
|
1043
|
+
except KeyError:
|
|
1044
|
+
dtypes_idx = (
|
|
1045
|
+
dtypes.index.to_frame()
|
|
1046
|
+
.merge(index.to_frame())
|
|
1047
|
+
.set_index(list(range(dtypes.index.nlevels)))
|
|
1048
|
+
.index
|
|
1049
|
+
)
|
|
1050
|
+
new_dtypes = dtypes.loc[dtypes_idx]
|
|
1051
|
+
new_dtypes.index.names = dtypes.index.names
|
|
1052
|
+
return new_dtypes
|
|
1053
|
+
|
|
1054
|
+
|
|
1055
|
+
@contextmanager
|
|
1056
|
+
def create_sa_connection(con, **kwargs):
|
|
1057
|
+
import sqlalchemy as sa
|
|
1058
|
+
from sqlalchemy.engine import Connection, Engine
|
|
1059
|
+
|
|
1060
|
+
# process con
|
|
1061
|
+
engine = None
|
|
1062
|
+
if isinstance(con, Connection):
|
|
1063
|
+
# connection create by user
|
|
1064
|
+
close = False
|
|
1065
|
+
dispose = False
|
|
1066
|
+
elif isinstance(con, Engine):
|
|
1067
|
+
con = con.connect()
|
|
1068
|
+
close = True
|
|
1069
|
+
dispose = False
|
|
1070
|
+
else:
|
|
1071
|
+
engine = sa.create_engine(con, **kwargs)
|
|
1072
|
+
con = engine.connect()
|
|
1073
|
+
close = True
|
|
1074
|
+
dispose = True
|
|
1075
|
+
|
|
1076
|
+
try:
|
|
1077
|
+
yield con
|
|
1078
|
+
finally:
|
|
1079
|
+
if close:
|
|
1080
|
+
con.close()
|
|
1081
|
+
if dispose:
|
|
1082
|
+
engine.dispose()
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
def wrap_arrow_type(arrow_type):
|
|
1086
|
+
if arrow_type == pa.string():
|
|
1087
|
+
return pd.StringDtype("pyarrow")
|
|
1088
|
+
return ArrowDtype(arrow_type)
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
def to_arrow_dtypes(dtypes):
|
|
1092
|
+
from ..io.odpsio.schema import pandas_dtypes_to_arrow_schema
|
|
1093
|
+
|
|
1094
|
+
if isinstance(dtypes, pa.Schema):
|
|
1095
|
+
arrow_schema = dtypes
|
|
1096
|
+
dtypes = arrow_schema.empty_table().to_pandas().dtypes
|
|
1097
|
+
else:
|
|
1098
|
+
arrow_schema = pandas_dtypes_to_arrow_schema(dtypes)
|
|
1099
|
+
new_dtypes = dtypes.copy()
|
|
1100
|
+
for i in range(len(dtypes)):
|
|
1101
|
+
arrow_type = arrow_schema.types[i]
|
|
1102
|
+
dt = dtypes.iloc[i]
|
|
1103
|
+
if isinstance(dt, pd.api.extensions.ExtensionDtype):
|
|
1104
|
+
# make existing extension dtype consistent
|
|
1105
|
+
new_dtypes.iloc[i] = dt
|
|
1106
|
+
else:
|
|
1107
|
+
new_dtypes.iloc[i] = wrap_arrow_type(arrow_type)
|
|
1108
|
+
return new_dtypes
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
def is_dataframe(x):
|
|
1112
|
+
if cudf is not None: # pragma: no cover
|
|
1113
|
+
if isinstance(x, cudf.DataFrame):
|
|
1114
|
+
return True
|
|
1115
|
+
return isinstance(x, pd.DataFrame)
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
def is_series(x):
|
|
1119
|
+
if cudf is not None: # pragma: no cover
|
|
1120
|
+
if isinstance(x, cudf.Series):
|
|
1121
|
+
return True
|
|
1122
|
+
return isinstance(x, pd.Series)
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def is_index(x):
|
|
1126
|
+
if cudf is not None: # pragma: no cover
|
|
1127
|
+
if isinstance(x, cudf.Index):
|
|
1128
|
+
return True
|
|
1129
|
+
return isinstance(x, pd.Index)
|
|
1130
|
+
|
|
1131
|
+
|
|
1132
|
+
def get_xdf(x):
|
|
1133
|
+
if cudf is not None: # pragma: no cover
|
|
1134
|
+
if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
|
|
1135
|
+
return cudf
|
|
1136
|
+
return pd
|
|
1137
|
+
|
|
1138
|
+
|
|
1139
|
+
def is_cudf(x):
|
|
1140
|
+
if cudf is not None: # pragma: no cover
|
|
1141
|
+
if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
|
|
1142
|
+
return True
|
|
1143
|
+
return False
|
|
1144
|
+
|
|
1145
|
+
|
|
1146
|
+
def whether_to_clean_up(op, threshold):
|
|
1147
|
+
func = op.func
|
|
1148
|
+
counted_bytes = 0
|
|
1149
|
+
max_recursion_depth = 2
|
|
1150
|
+
|
|
1151
|
+
from collections import deque
|
|
1152
|
+
from numbers import Number
|
|
1153
|
+
|
|
1154
|
+
BYPASS_CLASSES = (str, bytes, Number, range, bytearray, pd.DataFrame, pd.Series)
|
|
1155
|
+
|
|
1156
|
+
class GetSizeEarlyStopException(Exception):
|
|
1157
|
+
pass
|
|
1158
|
+
|
|
1159
|
+
def check_exceed_threshold():
|
|
1160
|
+
nonlocal threshold, counted_bytes
|
|
1161
|
+
if counted_bytes >= threshold:
|
|
1162
|
+
raise GetSizeEarlyStopException()
|
|
1163
|
+
|
|
1164
|
+
def getsize(obj_outer):
|
|
1165
|
+
_seen_obj_ids = set()
|
|
1166
|
+
|
|
1167
|
+
def inner_count(obj, recursion_depth):
|
|
1168
|
+
obj_id = id(obj)
|
|
1169
|
+
if obj_id in _seen_obj_ids or recursion_depth > max_recursion_depth:
|
|
1170
|
+
return 0
|
|
1171
|
+
_seen_obj_ids.add(obj_id)
|
|
1172
|
+
recursion_depth += 1
|
|
1173
|
+
size = sys.getsizeof(obj)
|
|
1174
|
+
if isinstance(obj, BYPASS_CLASSES):
|
|
1175
|
+
return size
|
|
1176
|
+
elif isinstance(obj, (tuple, list, set, deque)):
|
|
1177
|
+
size += sum(inner_count(i, recursion_depth) for i in obj)
|
|
1178
|
+
elif hasattr(obj, "items"):
|
|
1179
|
+
size += sum(
|
|
1180
|
+
inner_count(k, recursion_depth) + inner_count(v, recursion_depth)
|
|
1181
|
+
for k, v in getattr(obj, "items")()
|
|
1182
|
+
)
|
|
1183
|
+
if hasattr(obj, "__dict__"):
|
|
1184
|
+
size += inner_count(vars(obj), recursion_depth)
|
|
1185
|
+
if hasattr(obj, "__slots__"):
|
|
1186
|
+
size += sum(
|
|
1187
|
+
inner_count(getattr(obj, s), recursion_depth)
|
|
1188
|
+
for s in obj.__slots__
|
|
1189
|
+
if hasattr(obj, s)
|
|
1190
|
+
)
|
|
1191
|
+
return size
|
|
1192
|
+
|
|
1193
|
+
return inner_count(obj_outer, 0)
|
|
1194
|
+
|
|
1195
|
+
try:
|
|
1196
|
+
# Note: In most cases, func is just a function with closure, while chances are that
|
|
1197
|
+
# func is a callable that doesn't have __closure__ attribute.
|
|
1198
|
+
if inspect.isclass(func):
|
|
1199
|
+
pass
|
|
1200
|
+
elif hasattr(func, "__closure__") and func.__closure__ is not None:
|
|
1201
|
+
for cell in func.__closure__:
|
|
1202
|
+
counted_bytes += getsize(cell.cell_contents)
|
|
1203
|
+
check_exceed_threshold()
|
|
1204
|
+
elif callable(func):
|
|
1205
|
+
if hasattr(func, "__dict__"):
|
|
1206
|
+
for k, v in func.__dict__.items():
|
|
1207
|
+
counted_bytes += sum([getsize(k), getsize(v)])
|
|
1208
|
+
check_exceed_threshold()
|
|
1209
|
+
if hasattr(func, "__slots__"):
|
|
1210
|
+
for slot in func.__slots__:
|
|
1211
|
+
counted_bytes += (
|
|
1212
|
+
getsize(getattr(func, slot)) if hasattr(func, slot) else 0
|
|
1213
|
+
)
|
|
1214
|
+
check_exceed_threshold()
|
|
1215
|
+
except GetSizeEarlyStopException:
|
|
1216
|
+
logger.debug("Func needs cleanup.")
|
|
1217
|
+
op.need_clean_up_func = True
|
|
1218
|
+
else:
|
|
1219
|
+
assert op.need_clean_up_func is False
|
|
1220
|
+
logger.debug("Func doesn't need cleanup.")
|
|
1221
|
+
|
|
1222
|
+
return op.need_clean_up_func
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
def concat_on_columns(objs: List) -> Any:
|
|
1226
|
+
xdf = get_xdf(objs[0])
|
|
1227
|
+
# In cudf, concat with axis=1 and ignore_index=False by default behaves opposite to pandas.
|
|
1228
|
+
# Cudf would reset the index when axis=1 and ignore_index=False, which does not match with its document.
|
|
1229
|
+
# Therefore, we deal with this case specially.
|
|
1230
|
+
result = xdf.concat(objs, axis=1)
|
|
1231
|
+
if xdf is cudf:
|
|
1232
|
+
result.index = objs[0].index
|
|
1233
|
+
return result
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
def apply_if_callable(maybe_callable, obj, **kwargs):
|
|
1237
|
+
if callable(maybe_callable):
|
|
1238
|
+
return maybe_callable(obj, **kwargs)
|
|
1239
|
+
|
|
1240
|
+
return maybe_callable
|
|
1241
|
+
|
|
1242
|
+
|
|
1243
|
+
def patch_sa_engine_execute():
|
|
1244
|
+
"""
|
|
1245
|
+
pandas did not resolve compatibility issue of sqlalchemy 2.0, the issue
|
|
1246
|
+
is https://github.com/pandas-dev/pandas/issues/40686. We need to patch
|
|
1247
|
+
Engine class in SQLAlchemy, and then our code can work well.
|
|
1248
|
+
"""
|
|
1249
|
+
try:
|
|
1250
|
+
from sqlalchemy.engine import Engine
|
|
1251
|
+
except ImportError: # pragma: no cover
|
|
1252
|
+
return
|
|
1253
|
+
|
|
1254
|
+
def execute(self, statement, *multiparams, **params):
|
|
1255
|
+
connection = self.connect()
|
|
1256
|
+
return connection.execute(statement, *multiparams, **params)
|
|
1257
|
+
|
|
1258
|
+
if hasattr(Engine, "execute"): # pragma: no cover
|
|
1259
|
+
return
|
|
1260
|
+
Engine.execute = execute
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
def bind_func_args_from_pos(func, args_bind_position, *bound_args, **bound_kwargs):
|
|
1264
|
+
"""
|
|
1265
|
+
Create a new function with arguments bound from specified position.
|
|
1266
|
+
|
|
1267
|
+
Parameters
|
|
1268
|
+
----------
|
|
1269
|
+
func : callable
|
|
1270
|
+
Target function to be wrapped.
|
|
1271
|
+
args_bind_position : int
|
|
1272
|
+
Position to start binding arguments (0-based).
|
|
1273
|
+
e.g., n=0 binds from first arg, n=1 binds from second arg.
|
|
1274
|
+
*bound_args : tuple
|
|
1275
|
+
Arguments to be bound from position n.
|
|
1276
|
+
**bound_kwargs : dict
|
|
1277
|
+
Keyword arguments to be bound.
|
|
1278
|
+
|
|
1279
|
+
Returns
|
|
1280
|
+
-------
|
|
1281
|
+
callable
|
|
1282
|
+
Wrapped function with bound arguments.
|
|
1283
|
+
|
|
1284
|
+
Examples
|
|
1285
|
+
--------
|
|
1286
|
+
>>> def func(x, y, z=0):
|
|
1287
|
+
... return x * y + z
|
|
1288
|
+
>>> f = bind_func_args_from_pos(func, 0, 10) # bind from second position
|
|
1289
|
+
>>> f(5) # equals func(5, 10)
|
|
1290
|
+
10
|
|
1291
|
+
|
|
1292
|
+
Raises
|
|
1293
|
+
------
|
|
1294
|
+
TypeError
|
|
1295
|
+
If func is not callable or n is not an integer.
|
|
1296
|
+
ValueError
|
|
1297
|
+
If n is negative or exceeds the number of parameters.
|
|
1298
|
+
"""
|
|
1299
|
+
|
|
1300
|
+
@functools.wraps(func)
|
|
1301
|
+
def wrapper(*runtime_args, **runtime_kwargs):
|
|
1302
|
+
try:
|
|
1303
|
+
# Combine arguments
|
|
1304
|
+
all_args = (
|
|
1305
|
+
runtime_args[:args_bind_position]
|
|
1306
|
+
+ bound_args
|
|
1307
|
+
+ runtime_args[args_bind_position:]
|
|
1308
|
+
)
|
|
1309
|
+
all_kwargs = {**bound_kwargs, **runtime_kwargs}
|
|
1310
|
+
|
|
1311
|
+
return func(*all_args, **all_kwargs)
|
|
1312
|
+
except Exception as e:
|
|
1313
|
+
# Enhance error message with context
|
|
1314
|
+
raise type(e)(
|
|
1315
|
+
f"Error calling {func.__name__} with bound arguments: {str(e)}"
|
|
1316
|
+
) from e
|
|
1317
|
+
|
|
1318
|
+
return wrapper
|
|
1319
|
+
|
|
1320
|
+
|
|
1321
|
+
def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
|
|
1322
|
+
"""
|
|
1323
|
+
Pack the funcs with args and kwargs to avoid the ambiguity between other
|
|
1324
|
+
positional and keyword arguments. It will process the funcs by the following rule:
|
|
1325
|
+
|
|
1326
|
+
1. If there's no such args and kwargs, return funcs itself.
|
|
1327
|
+
|
|
1328
|
+
2. If the funcs is a dict-like object, it will iterate each key-value pair, pack the
|
|
1329
|
+
value recursively, and return a new dict with the same keys and packed values.
|
|
1330
|
+
|
|
1331
|
+
3. If the funcs is a list-like object, it will iterate each element, pack it
|
|
1332
|
+
recursively, and return a new list with the packed elements.
|
|
1333
|
+
|
|
1334
|
+
4. If the funcs is a str object, it will try to get the attribute df.funcs firstly,
|
|
1335
|
+
if it exists and is a callable, return a partial one with args and kwargs packed in.
|
|
1336
|
+
If it exists but isn't a callable, a ValueError is raised. If it doesn't exist, then
|
|
1337
|
+
try to get the attribute of np.funcs, if it exists and df is acceptable by funcs,
|
|
1338
|
+
return a partial one with args and kwargs packed in, otherwise an AttributeValue is
|
|
1339
|
+
raised. This rule is almost the same with pandas.
|
|
1340
|
+
|
|
1341
|
+
5. Other cases are treated as funcs being a callable, returns the partial one with
|
|
1342
|
+
args and kwargs packed in.
|
|
1343
|
+
|
|
1344
|
+
Parameters
|
|
1345
|
+
----------
|
|
1346
|
+
df : pandas.DataFrame or pandas.Series
|
|
1347
|
+
The DataFrame or Series object to test the function.
|
|
1348
|
+
funcs : function, str, list-like or dict-like
|
|
1349
|
+
Function to pack. It should have the same type with Dataframe.transform().
|
|
1350
|
+
args_bind_position: int
|
|
1351
|
+
Position to start binding arguments (0-based).
|
|
1352
|
+
e.g., n=0 binds from first arg, n=1 binds from second arg.
|
|
1353
|
+
*args :
|
|
1354
|
+
The positional arguments to func. If funcs contains many functions, each one
|
|
1355
|
+
should be able to accept *args.
|
|
1356
|
+
**kwargs :
|
|
1357
|
+
The keyword arguments to func. If funcs contains many functions, each one
|
|
1358
|
+
should be able to accept **kwargs.
|
|
1359
|
+
|
|
1360
|
+
Returns
|
|
1361
|
+
-------
|
|
1362
|
+
The packed functions having the same structure with funcs.
|
|
1363
|
+
|
|
1364
|
+
Raises
|
|
1365
|
+
------
|
|
1366
|
+
ValueError :
|
|
1367
|
+
If there's a string but the corresponding function doesn't accept any positional
|
|
1368
|
+
or keyword arguments.
|
|
1369
|
+
AttributeError :
|
|
1370
|
+
If there's a string but no corresponding function is found.
|
|
1371
|
+
"""
|
|
1372
|
+
from ..udf import MarkedFunction
|
|
1373
|
+
|
|
1374
|
+
if not args and not kwargs:
|
|
1375
|
+
return funcs
|
|
1376
|
+
|
|
1377
|
+
if is_dict_like(funcs):
|
|
1378
|
+
return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
|
|
1379
|
+
|
|
1380
|
+
if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
|
|
1381
|
+
return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
|
|
1382
|
+
|
|
1383
|
+
f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
|
|
1384
|
+
|
|
1385
|
+
if isinstance(f, MarkedFunction):
|
|
1386
|
+
# for marked function, pack the inner function, and reset as mark function
|
|
1387
|
+
packed_func = f.copy()
|
|
1388
|
+
packed_func.func = bind_func_args_from_pos(
|
|
1389
|
+
f.func, args_bind_position, *args, **kwargs
|
|
1390
|
+
)
|
|
1391
|
+
else:
|
|
1392
|
+
packed_func = bind_func_args_from_pos(f, args_bind_position, *args, **kwargs)
|
|
1393
|
+
|
|
1394
|
+
# Callable
|
|
1395
|
+
return packed_func
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
def get_callable_by_name(df: Any, func_name: str) -> Callable:
|
|
1399
|
+
"""
|
|
1400
|
+
Get the callable by the func name.
|
|
1401
|
+
It will try to get the attribute df.funcs firstly, if it exists and is a callable,
|
|
1402
|
+
return it. If it exists but isn't a callable, a ValueError is raised. If it doesn't
|
|
1403
|
+
exist, then try to get the attribute of np.funcs, if it exists and df is acceptable
|
|
1404
|
+
by funcs, return a partial one with args and kwargs packed in, otherwise an
|
|
1405
|
+
AttributeValue is raised. This rule is almost the same with pandas.
|
|
1406
|
+
|
|
1407
|
+
Parameters
|
|
1408
|
+
----------
|
|
1409
|
+
df: pandas.Series or pandas.Dataframe
|
|
1410
|
+
The receiver of the func name.
|
|
1411
|
+
func_name : str
|
|
1412
|
+
The func name.
|
|
1413
|
+
|
|
1414
|
+
Returns
|
|
1415
|
+
-------
|
|
1416
|
+
The callable instance.
|
|
1417
|
+
|
|
1418
|
+
Raises
|
|
1419
|
+
------
|
|
1420
|
+
ValueError :
|
|
1421
|
+
If it's not a valid callable.
|
|
1422
|
+
AttributeError :
|
|
1423
|
+
If there's no corresponding function is found.
|
|
1424
|
+
"""
|
|
1425
|
+
if hasattr(df, func_name):
|
|
1426
|
+
f = getattr(df, func_name)
|
|
1427
|
+
if callable(f):
|
|
1428
|
+
return f
|
|
1429
|
+
raise ValueError(f"{func_name} is not a callable")
|
|
1430
|
+
|
|
1431
|
+
if hasattr(np, func_name) and hasattr(df, "__array__"):
|
|
1432
|
+
return getattr(np, func_name)
|
|
1433
|
+
|
|
1434
|
+
raise AttributeError(
|
|
1435
|
+
f"'{func_name}' is not a valid function for '{type(df).__name__}' object"
|
|
1436
|
+
)
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
@dataclasses.dataclass
|
|
1440
|
+
class InferredDataFrameMeta:
|
|
1441
|
+
output_type: OutputType
|
|
1442
|
+
dtypes: Optional[pd.Series] = None
|
|
1443
|
+
dtype: Optional[Any] = None
|
|
1444
|
+
name: Optional[str] = None
|
|
1445
|
+
index_value: Optional["IndexValue"] = None
|
|
1446
|
+
maybe_agg: bool = False
|
|
1447
|
+
elementwise: bool = False
|
|
1448
|
+
|
|
1449
|
+
def check_absence(self, *args: str) -> None:
|
|
1450
|
+
args_set = set(args)
|
|
1451
|
+
if self.output_type == OutputType.dataframe:
|
|
1452
|
+
args_set.difference_update(["dtype", "name"])
|
|
1453
|
+
else:
|
|
1454
|
+
args_set.difference_update(["dtypes"])
|
|
1455
|
+
absent_args = [arg for arg in sorted(args_set) if getattr(self, arg) is None]
|
|
1456
|
+
if absent_args:
|
|
1457
|
+
raise TypeError(
|
|
1458
|
+
f"Cannot determine {', '.join(absent_args)} by calculating "
|
|
1459
|
+
"with mock data, please specify it as arguments"
|
|
1460
|
+
)
|
|
1461
|
+
|
|
1462
|
+
|
|
1463
|
+
def _get_groupby_input_df(groupby):
|
|
1464
|
+
in_df = groupby
|
|
1465
|
+
while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
1466
|
+
in_df = in_df.inputs[0]
|
|
1467
|
+
return in_df
|
|
1468
|
+
|
|
1469
|
+
|
|
1470
|
+
def infer_dataframe_return_value(
|
|
1471
|
+
df_obj,
|
|
1472
|
+
func,
|
|
1473
|
+
output_type=None,
|
|
1474
|
+
dtypes=None,
|
|
1475
|
+
dtype=None,
|
|
1476
|
+
name=None,
|
|
1477
|
+
index=None,
|
|
1478
|
+
inherit_index=False,
|
|
1479
|
+
build_kw=None,
|
|
1480
|
+
elementwise=None,
|
|
1481
|
+
skip_infer=False,
|
|
1482
|
+
) -> InferredDataFrameMeta:
|
|
1483
|
+
from .core import GROUPBY_TYPE, INDEX_TYPE
|
|
1484
|
+
from .typing_ import get_function_output_meta
|
|
1485
|
+
|
|
1486
|
+
unwrapped_func = func
|
|
1487
|
+
if isinstance(unwrapped_func, MarkedFunction):
|
|
1488
|
+
unwrapped_func = unwrapped_func.func
|
|
1489
|
+
while True:
|
|
1490
|
+
if isinstance(unwrapped_func, functools.partial):
|
|
1491
|
+
unwrapped_func = unwrapped_func.func
|
|
1492
|
+
elif hasattr(unwrapped_func, "__wrapped__"):
|
|
1493
|
+
unwrapped_func = unwrapped_func.__wrapped__
|
|
1494
|
+
else:
|
|
1495
|
+
break
|
|
1496
|
+
|
|
1497
|
+
func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
|
|
1498
|
+
func_index_value = None
|
|
1499
|
+
if func_annotation_meta:
|
|
1500
|
+
output_type = output_type or func_annotation_meta.output_type
|
|
1501
|
+
dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
|
|
1502
|
+
dtype = dtype if dtype is not None else func_annotation_meta.dtype
|
|
1503
|
+
name = name if name is not None else func_annotation_meta.name
|
|
1504
|
+
func_index_value = func_annotation_meta.index_value
|
|
1505
|
+
|
|
1506
|
+
if skip_infer:
|
|
1507
|
+
if isinstance(index, INDEX_TYPE):
|
|
1508
|
+
ret_index_value = index.index_value
|
|
1509
|
+
elif index is not None:
|
|
1510
|
+
ret_index_value = parse_index(index, df_obj.key)
|
|
1511
|
+
else:
|
|
1512
|
+
ret_index_value = func_index_value
|
|
1513
|
+
|
|
1514
|
+
return InferredDataFrameMeta(
|
|
1515
|
+
output_type=output_type,
|
|
1516
|
+
dtypes=dtypes,
|
|
1517
|
+
dtype=dtype,
|
|
1518
|
+
name=name,
|
|
1519
|
+
index_value=ret_index_value,
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
if isinstance(index, INDEX_TYPE):
|
|
1523
|
+
index = index.index_value
|
|
1524
|
+
|
|
1525
|
+
if elementwise is None:
|
|
1526
|
+
elementwise = isinstance(unwrapped_func, np.ufunc)
|
|
1527
|
+
|
|
1528
|
+
ret_index_value = func_index_value
|
|
1529
|
+
if output_type is not None and (dtypes is not None or dtype is not None):
|
|
1530
|
+
if inherit_index:
|
|
1531
|
+
ret_index_value = df_obj.index_value
|
|
1532
|
+
elif index is not None:
|
|
1533
|
+
ret_index_value = parse_index(index)
|
|
1534
|
+
|
|
1535
|
+
if ret_index_value is not None:
|
|
1536
|
+
return InferredDataFrameMeta(
|
|
1537
|
+
output_type,
|
|
1538
|
+
dtypes,
|
|
1539
|
+
dtype,
|
|
1540
|
+
name,
|
|
1541
|
+
ret_index_value,
|
|
1542
|
+
elementwise=elementwise or False,
|
|
1543
|
+
)
|
|
1544
|
+
|
|
1545
|
+
ret_output_type = None
|
|
1546
|
+
ret_dtypes = dtypes
|
|
1547
|
+
maybe_agg = False
|
|
1548
|
+
build_kw = build_kw or {}
|
|
1549
|
+
obj_key = df_obj.key
|
|
1550
|
+
|
|
1551
|
+
if elementwise:
|
|
1552
|
+
inherit_index = True
|
|
1553
|
+
(ret_output_type,) = get_output_types(df_obj)
|
|
1554
|
+
if index is not None:
|
|
1555
|
+
ret_index_value = parse_index(index)
|
|
1556
|
+
|
|
1557
|
+
if isinstance(df_obj, GROUPBY_TYPE):
|
|
1558
|
+
is_groupby = True
|
|
1559
|
+
empty_df_obj = df_obj.op.build_mock_groupby(**build_kw)
|
|
1560
|
+
else:
|
|
1561
|
+
is_groupby = False
|
|
1562
|
+
empty_df_obj = (
|
|
1563
|
+
build_df(df_obj, **build_kw)
|
|
1564
|
+
if df_obj.ndim == 2
|
|
1565
|
+
else build_series(df_obj, **build_kw)
|
|
1566
|
+
)
|
|
1567
|
+
try:
|
|
1568
|
+
with np.errstate(all="ignore"), quiet_stdio():
|
|
1569
|
+
infer_df_obj = func(empty_df_obj)
|
|
1570
|
+
|
|
1571
|
+
if ret_index_value is None:
|
|
1572
|
+
if (
|
|
1573
|
+
infer_df_obj is None
|
|
1574
|
+
or not hasattr(infer_df_obj, "index")
|
|
1575
|
+
or infer_df_obj.index is None
|
|
1576
|
+
):
|
|
1577
|
+
ret_index_value = parse_index(pd.RangeIndex(-1))
|
|
1578
|
+
elif (
|
|
1579
|
+
infer_df_obj.index is getattr(empty_df_obj, "index", None)
|
|
1580
|
+
or inherit_index
|
|
1581
|
+
):
|
|
1582
|
+
ret_index_value = df_obj.index_value
|
|
1583
|
+
else:
|
|
1584
|
+
ret_index_value = parse_index(infer_df_obj.index, obj_key, func)
|
|
1585
|
+
|
|
1586
|
+
if isinstance(infer_df_obj, pd.DataFrame):
|
|
1587
|
+
if output_type is not None and output_type != OutputType.dataframe:
|
|
1588
|
+
raise TypeError(
|
|
1589
|
+
f'Cannot infer output_type as "series", '
|
|
1590
|
+
f'please specify `output_type` as "dataframe"'
|
|
1591
|
+
)
|
|
1592
|
+
ret_output_type = ret_output_type or OutputType.dataframe
|
|
1593
|
+
if ret_dtypes is None:
|
|
1594
|
+
ret_dtypes = infer_df_obj.dtypes
|
|
1595
|
+
else:
|
|
1596
|
+
if output_type is not None and output_type == OutputType.dataframe:
|
|
1597
|
+
raise TypeError(
|
|
1598
|
+
f'Cannot infer output_type as "dataframe", '
|
|
1599
|
+
f'please specify `output_type` as "series"'
|
|
1600
|
+
)
|
|
1601
|
+
ret_output_type = ret_output_type or OutputType.series
|
|
1602
|
+
name = name or getattr(infer_df_obj, "name", None)
|
|
1603
|
+
dtype = dtype or infer_df_obj.dtype
|
|
1604
|
+
|
|
1605
|
+
if is_groupby and len(infer_df_obj) <= 2:
|
|
1606
|
+
# we create mock df with 4 rows, 2 groups
|
|
1607
|
+
# if return df has 2 rows, we assume that
|
|
1608
|
+
# it's an aggregation operation
|
|
1609
|
+
maybe_agg = True
|
|
1610
|
+
|
|
1611
|
+
return InferredDataFrameMeta(
|
|
1612
|
+
ret_output_type,
|
|
1613
|
+
make_dtypes(ret_dtypes),
|
|
1614
|
+
make_dtypes(dtype),
|
|
1615
|
+
name,
|
|
1616
|
+
ret_index_value,
|
|
1617
|
+
maybe_agg,
|
|
1618
|
+
elementwise=elementwise,
|
|
1619
|
+
)
|
|
1620
|
+
except: # noqa: E722 # nosec
|
|
1621
|
+
logger.info(
|
|
1622
|
+
"Exception raised while inferring meta of function result", exc_info=True
|
|
1623
|
+
)
|
|
1624
|
+
return InferredDataFrameMeta(
|
|
1625
|
+
output_type,
|
|
1626
|
+
make_dtypes(dtypes),
|
|
1627
|
+
make_dtypes(dtype),
|
|
1628
|
+
name,
|
|
1629
|
+
ret_index_value,
|
|
1630
|
+
maybe_agg,
|
|
1631
|
+
elementwise=elementwise,
|
|
1632
|
+
)
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
|
|
1636
|
+
from ..config import options
|
|
1637
|
+
|
|
1638
|
+
expect_engine = None
|
|
1639
|
+
expect_gpu = None
|
|
1640
|
+
fs_mount = None
|
|
1641
|
+
default_options = options.function.default_running_options or {}
|
|
1642
|
+
|
|
1643
|
+
if isinstance(func, MarkedFunction):
|
|
1644
|
+
# copy from marked function
|
|
1645
|
+
expect_engine = func.expect_engine
|
|
1646
|
+
expect_resources = func.expect_resources or {}
|
|
1647
|
+
expect_gpu = func.gpu
|
|
1648
|
+
fs_mount = func.fs_mount
|
|
1649
|
+
|
|
1650
|
+
# merge default options if not set
|
|
1651
|
+
for key, value in default_options.items():
|
|
1652
|
+
if key not in expect_resources or expect_resources.get(key) is None:
|
|
1653
|
+
expect_resources[key] = value
|
|
1654
|
+
else:
|
|
1655
|
+
# copy from default options
|
|
1656
|
+
expect_resources = default_options
|
|
1657
|
+
|
|
1658
|
+
# Validate and adjust resource ratio constraints on client side
|
|
1659
|
+
expect_resources, _ = validate_and_adjust_resource_ratio(
|
|
1660
|
+
expect_resources,
|
|
1661
|
+
max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
|
|
1662
|
+
adjust=True,
|
|
1663
|
+
)
|
|
1664
|
+
|
|
1665
|
+
# If GPU is required but gu_quota not set, inherit from global setting
|
|
1666
|
+
if expect_resources.get("gpu"):
|
|
1667
|
+
expect_resources["gu_quota"] = expect_resources.get(
|
|
1668
|
+
"gu_quota", [options.session.gu_quota_name]
|
|
1669
|
+
)
|
|
1670
|
+
|
|
1671
|
+
if expect_engine:
|
|
1672
|
+
op.expect_engine = expect_engine
|
|
1673
|
+
if expect_resources:
|
|
1674
|
+
op.expect_resources = expect_resources
|
|
1675
|
+
if expect_gpu:
|
|
1676
|
+
op.gpu = expect_gpu
|
|
1677
|
+
if fs_mount:
|
|
1678
|
+
op.fs_mount = fs_mount
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
def make_column_list(col, dtypes_or_columns, level=None):
|
|
1682
|
+
"""Returns [col] if col is a column in dtypes"""
|
|
1683
|
+
try:
|
|
1684
|
+
if isinstance(dtypes_or_columns, pd.Series):
|
|
1685
|
+
idx = dtypes_or_columns.index
|
|
1686
|
+
else:
|
|
1687
|
+
idx = dtypes_or_columns
|
|
1688
|
+
|
|
1689
|
+
if level is None:
|
|
1690
|
+
if col in idx:
|
|
1691
|
+
return [col]
|
|
1692
|
+
elif isinstance(col, int):
|
|
1693
|
+
col = [col]
|
|
1694
|
+
if all(c in idx for c in col):
|
|
1695
|
+
return col
|
|
1696
|
+
if all(isinstance(c, int) for c in col):
|
|
1697
|
+
return [idx[c] for c in col]
|
|
1698
|
+
return col
|
|
1699
|
+
else:
|
|
1700
|
+
level_idx = idx.get_level_values(level)
|
|
1701
|
+
if isinstance(col, list):
|
|
1702
|
+
cols = col
|
|
1703
|
+
else:
|
|
1704
|
+
cols = [col]
|
|
1705
|
+
mask = level_idx.isin(cols)
|
|
1706
|
+
if not mask.any():
|
|
1707
|
+
mask = col
|
|
1708
|
+
return idx[mask]
|
|
1709
|
+
except (IndexError, TypeError, ValueError):
|
|
1710
|
+
return col
|
|
1711
|
+
|
|
1712
|
+
|
|
1713
|
+
def call_groupby_with_params(df_or_series, groupby_params: dict):
|
|
1714
|
+
params = groupby_params.copy()
|
|
1715
|
+
selection = params.pop("selection", None)
|
|
1716
|
+
res = df_or_series.groupby(**params)
|
|
1717
|
+
if selection:
|
|
1718
|
+
res = res[selection]
|
|
1719
|
+
return res
|
|
1720
|
+
|
|
1721
|
+
|
|
1722
|
+
def validate_dtype_backend(value):
|
|
1723
|
+
if isinstance(value, bool):
|
|
1724
|
+
# compatibility for legacy use_arrow_dtype property
|
|
1725
|
+
value = "pyarrow" if value else "numpy"
|
|
1726
|
+
if not dtype_backend_validator(value):
|
|
1727
|
+
raise ValueError(f"Invalid dtype_backend: {value}")
|
|
1728
|
+
return value
|