maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
maxframe/utils.py
ADDED
|
@@ -0,0 +1,1774 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import asyncio.events
|
|
16
|
+
import concurrent.futures
|
|
17
|
+
import contextlib
|
|
18
|
+
import contextvars
|
|
19
|
+
import copy
|
|
20
|
+
import dataclasses
|
|
21
|
+
import datetime
|
|
22
|
+
import enum
|
|
23
|
+
import functools
|
|
24
|
+
import importlib
|
|
25
|
+
import inspect
|
|
26
|
+
import io
|
|
27
|
+
import itertools
|
|
28
|
+
import logging
|
|
29
|
+
import math
|
|
30
|
+
import numbers
|
|
31
|
+
import os
|
|
32
|
+
import pkgutil
|
|
33
|
+
import random
|
|
34
|
+
import re
|
|
35
|
+
import struct
|
|
36
|
+
import sys
|
|
37
|
+
import tempfile
|
|
38
|
+
import threading
|
|
39
|
+
import time
|
|
40
|
+
import tokenize as pytokenize
|
|
41
|
+
import types
|
|
42
|
+
import warnings
|
|
43
|
+
import weakref
|
|
44
|
+
import zlib
|
|
45
|
+
from collections.abc import Hashable, Mapping
|
|
46
|
+
from contextlib import contextmanager
|
|
47
|
+
from typing import (
|
|
48
|
+
Any,
|
|
49
|
+
Awaitable,
|
|
50
|
+
Callable,
|
|
51
|
+
Dict,
|
|
52
|
+
Generator,
|
|
53
|
+
Iterable,
|
|
54
|
+
List,
|
|
55
|
+
Optional,
|
|
56
|
+
Tuple,
|
|
57
|
+
Type,
|
|
58
|
+
TypeVar,
|
|
59
|
+
Union,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
import msgpack
|
|
63
|
+
import numpy as np
|
|
64
|
+
import pandas as pd
|
|
65
|
+
import traitlets
|
|
66
|
+
from tornado import httpclient, web
|
|
67
|
+
from tornado.simple_httpclient import HTTPTimeoutError
|
|
68
|
+
|
|
69
|
+
from ._utils import ( # noqa: F401 # pylint: disable=unused-import
|
|
70
|
+
NamedType,
|
|
71
|
+
Timer,
|
|
72
|
+
TypeDispatcher,
|
|
73
|
+
ceildiv,
|
|
74
|
+
get_user_call_point,
|
|
75
|
+
new_random_id,
|
|
76
|
+
register_tokenizer,
|
|
77
|
+
reset_id_random_seed,
|
|
78
|
+
to_binary,
|
|
79
|
+
to_str,
|
|
80
|
+
to_text,
|
|
81
|
+
tokenize,
|
|
82
|
+
tokenize_int,
|
|
83
|
+
)
|
|
84
|
+
from .lib.dtypes_extension import ArrowDtype
|
|
85
|
+
from .lib.version import parse as parse_version
|
|
86
|
+
from .typing_ import TileableType, TimeoutType
|
|
87
|
+
|
|
88
|
+
# make flake8 happy by referencing these imports
|
|
89
|
+
NamedType = NamedType
|
|
90
|
+
TypeDispatcher = TypeDispatcher
|
|
91
|
+
tokenize = tokenize
|
|
92
|
+
register_tokenizer = register_tokenizer
|
|
93
|
+
ceildiv = ceildiv
|
|
94
|
+
reset_id_random_seed = reset_id_random_seed
|
|
95
|
+
new_random_id = new_random_id
|
|
96
|
+
get_user_call_point = get_user_call_point
|
|
97
|
+
_is_ci = (os.environ.get("CI") or "0").lower() in ("1", "true")
|
|
98
|
+
pd_release_version: Tuple[int] = parse_version(pd.__version__).release
|
|
99
|
+
|
|
100
|
+
logger = logging.getLogger(__name__)
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
from pandas._libs import lib as _pd__libs_lib
|
|
104
|
+
from pandas._libs.lib import NoDefault, no_default
|
|
105
|
+
|
|
106
|
+
_raw__reduce__ = type(NoDefault).__reduce__
|
|
107
|
+
|
|
108
|
+
def _no_default__reduce__(self):
|
|
109
|
+
if self is not NoDefault:
|
|
110
|
+
return _raw__reduce__(self)
|
|
111
|
+
else: # pragma: no cover
|
|
112
|
+
return getattr, (_pd__libs_lib, "NoDefault")
|
|
113
|
+
|
|
114
|
+
if hasattr(_pd__libs_lib, "_NoDefault"): # pragma: no cover
|
|
115
|
+
# need to patch __reduce__ to make sure it can be properly unpickled
|
|
116
|
+
type(NoDefault).__reduce__ = _no_default__reduce__
|
|
117
|
+
else:
|
|
118
|
+
# introduced in pandas 1.5.0 : register for pickle compatibility
|
|
119
|
+
_pd__libs_lib._NoDefault = NoDefault
|
|
120
|
+
except ImportError: # pragma: no cover
|
|
121
|
+
|
|
122
|
+
class NoDefault(enum.Enum):
|
|
123
|
+
no_default = "NO_DEFAULT"
|
|
124
|
+
|
|
125
|
+
def __repr__(self) -> str:
|
|
126
|
+
return "<no_default>"
|
|
127
|
+
|
|
128
|
+
no_default = NoDefault.no_default
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
# register for pickle compatibility
|
|
132
|
+
from pandas._libs import lib as _pd__libs_lib
|
|
133
|
+
|
|
134
|
+
_pd__libs_lib.NoDefault = NoDefault
|
|
135
|
+
except (ImportError, AttributeError):
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
import pyarrow as pa
|
|
140
|
+
except ImportError:
|
|
141
|
+
pa = None
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
from pandas import ArrowDtype as PandasArrowDtype # noqa: F401
|
|
145
|
+
|
|
146
|
+
ARROW_DTYPE_NOT_SUPPORTED = False
|
|
147
|
+
except ImportError:
|
|
148
|
+
ARROW_DTYPE_NOT_SUPPORTED = True
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class classproperty:
|
|
152
|
+
def __init__(self, f):
|
|
153
|
+
self.f = f
|
|
154
|
+
|
|
155
|
+
def __get__(self, obj, owner):
|
|
156
|
+
return self.f(owner)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def implements(f: Callable):
|
|
160
|
+
def decorator(g):
|
|
161
|
+
g.__doc__ = f.__doc__
|
|
162
|
+
return g
|
|
163
|
+
|
|
164
|
+
return decorator
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class AttributeDict(dict):
|
|
168
|
+
def __getattr__(self, item):
|
|
169
|
+
try:
|
|
170
|
+
return self[item]
|
|
171
|
+
except KeyError:
|
|
172
|
+
raise AttributeError(f"'AttributeDict' object has no attribute {item}")
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def on_serialize_shape(shape: Tuple[int]):
|
|
176
|
+
def _to_shape_num(x):
|
|
177
|
+
if np.isnan(x):
|
|
178
|
+
return -1
|
|
179
|
+
if isinstance(x, np.generic):
|
|
180
|
+
return x.item()
|
|
181
|
+
return x
|
|
182
|
+
|
|
183
|
+
if shape:
|
|
184
|
+
return tuple(_to_shape_num(s) for s in shape)
|
|
185
|
+
return shape
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def on_deserialize_shape(shape: Tuple[int]):
|
|
189
|
+
if shape:
|
|
190
|
+
return tuple(s if s != -1 else np.nan for s in shape)
|
|
191
|
+
return shape
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def on_serialize_numpy_type(value: np.dtype):
|
|
195
|
+
if value is pd.NaT:
|
|
196
|
+
value = None
|
|
197
|
+
return value.item() if isinstance(value, np.generic) else value
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def on_serialize_nsplits(value: Tuple[Tuple[int]]):
|
|
201
|
+
if value is None:
|
|
202
|
+
return None
|
|
203
|
+
new_nsplits = []
|
|
204
|
+
for dim_splits in value:
|
|
205
|
+
new_nsplits.append(tuple(None if pd.isna(v) else v for v in dim_splits))
|
|
206
|
+
return tuple(new_nsplits)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def has_unknown_shape(
|
|
210
|
+
*tiled_tileables: TileableType, axis: Union[None, int, List[int]] = None
|
|
211
|
+
) -> bool:
|
|
212
|
+
if isinstance(axis, int):
|
|
213
|
+
axis = [axis]
|
|
214
|
+
|
|
215
|
+
for tileable in tiled_tileables:
|
|
216
|
+
if getattr(tileable, "shape", None) is None:
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
shape_iter = (
|
|
220
|
+
tileable.shape if axis is None else (tileable.shape[idx] for idx in axis)
|
|
221
|
+
)
|
|
222
|
+
if any(pd.isnull(s) for s in shape_iter):
|
|
223
|
+
return True
|
|
224
|
+
|
|
225
|
+
nsplits_iter = (
|
|
226
|
+
tileable.nsplits
|
|
227
|
+
if axis is None
|
|
228
|
+
else (tileable.nsplits[idx] for idx in axis)
|
|
229
|
+
)
|
|
230
|
+
if any(pd.isnull(s) for s in itertools.chain(*nsplits_iter)):
|
|
231
|
+
return True
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def calc_nsplits(chunk_idx_to_shape: Dict[Tuple[int], Tuple[int]]) -> Tuple[Tuple[int]]:
|
|
236
|
+
"""
|
|
237
|
+
Calculate a tiled entity's nsplits.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
chunk_idx_to_shape : Dict type, {chunk_idx: chunk_shape}
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
nsplits
|
|
246
|
+
"""
|
|
247
|
+
ndim = len(next(iter(chunk_idx_to_shape)))
|
|
248
|
+
tileable_nsplits = []
|
|
249
|
+
# for each dimension, record chunk shape whose index is zero on other dimensions
|
|
250
|
+
for i in range(ndim):
|
|
251
|
+
splits = []
|
|
252
|
+
for index, shape in chunk_idx_to_shape.items():
|
|
253
|
+
if all(idx == 0 for j, idx in enumerate(index) if j != i):
|
|
254
|
+
splits.append(shape[i])
|
|
255
|
+
tileable_nsplits.append(tuple(splits))
|
|
256
|
+
return tuple(tileable_nsplits)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def copy_tileables(tileables: List[TileableType], **kwargs):
|
|
260
|
+
inputs = kwargs.pop("inputs", None)
|
|
261
|
+
copy_key = kwargs.pop("copy_key", True)
|
|
262
|
+
copy_id = kwargs.pop("copy_id", True)
|
|
263
|
+
if kwargs:
|
|
264
|
+
raise TypeError(f"got un unexpected keyword argument '{next(iter(kwargs))}'")
|
|
265
|
+
if len(tileables) > 1:
|
|
266
|
+
# cannot handle tileables with different operators here
|
|
267
|
+
# try to copy separately if so
|
|
268
|
+
if len({t.op for t in tileables}) != 1:
|
|
269
|
+
raise TypeError("All tileables' operators should be same.")
|
|
270
|
+
|
|
271
|
+
op = tileables[0].op.copy().reset_key()
|
|
272
|
+
if copy_key:
|
|
273
|
+
op._key = tileables[0].op.key
|
|
274
|
+
kws = []
|
|
275
|
+
for t in tileables:
|
|
276
|
+
params = t.params.copy()
|
|
277
|
+
if copy_key:
|
|
278
|
+
params["_key"] = t.key
|
|
279
|
+
if copy_id:
|
|
280
|
+
params["_id"] = t.id
|
|
281
|
+
params.update(t.extra_params)
|
|
282
|
+
kws.append(params)
|
|
283
|
+
inputs = inputs or op.inputs
|
|
284
|
+
return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def make_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
|
|
288
|
+
if dtype is None:
|
|
289
|
+
return None
|
|
290
|
+
elif (
|
|
291
|
+
isinstance(dtype, str) and dtype == "category"
|
|
292
|
+
) or pd.api.types.is_extension_array_dtype(dtype):
|
|
293
|
+
# return string dtype directly as legacy python version
|
|
294
|
+
# does not support ExtensionDtype
|
|
295
|
+
return dtype
|
|
296
|
+
elif dtype is pd.Timestamp or dtype is datetime.datetime:
|
|
297
|
+
return np.dtype("datetime64[ns]")
|
|
298
|
+
elif dtype is pd.Timedelta or dtype is datetime.timedelta:
|
|
299
|
+
return np.dtype("timedelta64[ns]")
|
|
300
|
+
else:
|
|
301
|
+
try:
|
|
302
|
+
return pd.api.types.pandas_dtype(dtype)
|
|
303
|
+
except TypeError:
|
|
304
|
+
return np.dtype("O")
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def make_dtypes(
|
|
308
|
+
dtypes: Union[
|
|
309
|
+
list, dict, str, np.dtype, pd.Series, pd.api.extensions.ExtensionDtype
|
|
310
|
+
],
|
|
311
|
+
make_series: bool = True,
|
|
312
|
+
):
|
|
313
|
+
if dtypes is None:
|
|
314
|
+
return None
|
|
315
|
+
elif isinstance(dtypes, np.dtype):
|
|
316
|
+
return dtypes
|
|
317
|
+
elif isinstance(dtypes, list):
|
|
318
|
+
val = [make_dtype(dt) for dt in dtypes]
|
|
319
|
+
return val if not make_series else pd.Series(val)
|
|
320
|
+
elif isinstance(dtypes, dict):
|
|
321
|
+
val = {k: make_dtype(v) for k, v in dtypes.items()}
|
|
322
|
+
return val if not make_series else pd.Series(val)
|
|
323
|
+
elif isinstance(dtypes, pd.Series):
|
|
324
|
+
return dtypes.map(make_dtype)
|
|
325
|
+
else:
|
|
326
|
+
return make_dtype(dtypes)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def serialize_serializable(serializable, compress: bool = False):
|
|
330
|
+
from .serialization import serialize
|
|
331
|
+
|
|
332
|
+
bio = io.BytesIO()
|
|
333
|
+
header, buffers = serialize(serializable)
|
|
334
|
+
buf_sizes = [getattr(buf, "nbytes", len(buf)) for buf in buffers]
|
|
335
|
+
header[0]["buf_sizes"] = buf_sizes
|
|
336
|
+
|
|
337
|
+
def encode_np_num(obj):
|
|
338
|
+
if isinstance(obj, np.generic) and obj.shape == () and not np.isnan(obj):
|
|
339
|
+
return obj.item()
|
|
340
|
+
return obj
|
|
341
|
+
|
|
342
|
+
s_header = msgpack.dumps(header, default=encode_np_num)
|
|
343
|
+
|
|
344
|
+
bio.write(struct.pack("<Q", len(s_header)))
|
|
345
|
+
bio.write(s_header)
|
|
346
|
+
for buf in buffers:
|
|
347
|
+
bio.write(buf)
|
|
348
|
+
ser_graph = bio.getvalue()
|
|
349
|
+
|
|
350
|
+
if compress:
|
|
351
|
+
ser_graph = zlib.compress(ser_graph)
|
|
352
|
+
return ser_graph
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def deserialize_serializable(ser_serializable: bytes):
|
|
356
|
+
from .serialization import deserialize
|
|
357
|
+
|
|
358
|
+
bio = io.BytesIO(ser_serializable)
|
|
359
|
+
s_header_length = struct.unpack("Q", bio.read(8))[0]
|
|
360
|
+
header2 = msgpack.loads(bio.read(s_header_length))
|
|
361
|
+
buffers2 = [bio.read(s) for s in header2[0]["buf_sizes"]]
|
|
362
|
+
return deserialize(header2, buffers2)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def skip_na_call(func: Callable):
|
|
366
|
+
@functools.wraps(func)
|
|
367
|
+
def new_func(x):
|
|
368
|
+
return func(x) if x is not None else None
|
|
369
|
+
|
|
370
|
+
return new_func
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def url_path_join(*pieces):
|
|
374
|
+
"""Join components of url into a relative url
|
|
375
|
+
|
|
376
|
+
Use to prevent double slash when joining subpath. This will leave the
|
|
377
|
+
initial and final / in place
|
|
378
|
+
"""
|
|
379
|
+
initial = pieces[0].startswith("/")
|
|
380
|
+
final = pieces[-1].endswith("/")
|
|
381
|
+
stripped = [s.strip("/") for s in pieces]
|
|
382
|
+
result = "/".join(s for s in stripped if s)
|
|
383
|
+
if initial:
|
|
384
|
+
result = "/" + result
|
|
385
|
+
if final:
|
|
386
|
+
result = result + "/"
|
|
387
|
+
if result == "//":
|
|
388
|
+
result = "/"
|
|
389
|
+
return result
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def random_ports(port: int, n: int):
|
|
393
|
+
"""Generate a list of n random ports near the given port.
|
|
394
|
+
|
|
395
|
+
The first 5 ports will be sequential, and the remaining n-5 will be
|
|
396
|
+
randomly selected in the range [port-2*n, port+2*n].
|
|
397
|
+
"""
|
|
398
|
+
for i in range(min(5, n)):
|
|
399
|
+
yield port + i
|
|
400
|
+
for i in range(n - 5):
|
|
401
|
+
yield max(1, port + random.randint(-2 * n, 2 * n))
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def build_temp_table_name(session_id: str, tileable_key: str) -> str:
|
|
405
|
+
return f"tmp_mf_{session_id}_{tileable_key}"
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> str:
|
|
409
|
+
temp_table = build_temp_table_name(session_id, tileable_key)
|
|
410
|
+
return f"{temp_table}_intermediate"
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def build_session_volume_name(session_id: str) -> str:
|
|
414
|
+
return f"mf_vol_{session_id.replace('-', '_')}"
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
async def wait_http_response(
|
|
418
|
+
url: str, *, request_timeout: TimeoutType = None, **kwargs
|
|
419
|
+
) -> httpclient.HTTPResponse:
|
|
420
|
+
start_time = time.time()
|
|
421
|
+
while request_timeout is None or time.time() - start_time < request_timeout:
|
|
422
|
+
timeout_left = min(10.0, time.time() - start_time) if request_timeout else None
|
|
423
|
+
try:
|
|
424
|
+
return await httpclient.AsyncHTTPClient().fetch(
|
|
425
|
+
url, request_timeout=timeout_left, **kwargs
|
|
426
|
+
)
|
|
427
|
+
except HTTPTimeoutError:
|
|
428
|
+
pass
|
|
429
|
+
raise TimeoutError
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def get_handler_timeout_value(handler: web.RequestHandler) -> TimeoutType:
|
|
433
|
+
wait = bool(int(handler.get_argument("wait", "0")))
|
|
434
|
+
timeout = float(handler.get_argument("timeout", "0"))
|
|
435
|
+
if wait and abs(timeout) < 1e-6:
|
|
436
|
+
timeout = None
|
|
437
|
+
elif not wait:
|
|
438
|
+
timeout = 0
|
|
439
|
+
return timeout
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def format_timeout_params(timeout: TimeoutType) -> str:
|
|
443
|
+
if timeout is None:
|
|
444
|
+
return "?wait=1"
|
|
445
|
+
elif abs(timeout) < 1e-6:
|
|
446
|
+
return "?wait=0"
|
|
447
|
+
else:
|
|
448
|
+
return f"?wait=1&timeout={timeout}"
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def unwrap_partial_function(func):
|
|
452
|
+
while isinstance(func, functools.partial):
|
|
453
|
+
func = func.func
|
|
454
|
+
return func
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
_PrimitiveType = TypeVar("_PrimitiveType")
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def create_sync_primitive(
|
|
461
|
+
cls: Type[_PrimitiveType], loop: asyncio.AbstractEventLoop
|
|
462
|
+
) -> _PrimitiveType:
|
|
463
|
+
"""
|
|
464
|
+
Create an asyncio sync primitive (locks, events, etc.)
|
|
465
|
+
in a certain event loop.
|
|
466
|
+
"""
|
|
467
|
+
if sys.version_info[1] < 10:
|
|
468
|
+
return cls(loop=loop)
|
|
469
|
+
|
|
470
|
+
# From Python3.10 the loop parameter has been removed. We should work around here.
|
|
471
|
+
try:
|
|
472
|
+
old_loop = asyncio.get_event_loop()
|
|
473
|
+
except RuntimeError:
|
|
474
|
+
old_loop = None
|
|
475
|
+
try:
|
|
476
|
+
asyncio.set_event_loop(loop)
|
|
477
|
+
primitive = cls()
|
|
478
|
+
finally:
|
|
479
|
+
asyncio.set_event_loop(old_loop)
|
|
480
|
+
return primitive
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
class ToThreadCancelledError(asyncio.CancelledError):
|
|
484
|
+
def __init__(self, *args, result=None):
|
|
485
|
+
super().__init__(*args)
|
|
486
|
+
self._result = result
|
|
487
|
+
|
|
488
|
+
@property
|
|
489
|
+
def result(self):
|
|
490
|
+
return self._result
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
_ToThreadRetType = TypeVar("_ToThreadRetType")
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
class ToThreadMixin:
|
|
497
|
+
_thread_pool_size = 1
|
|
498
|
+
_counter = itertools.count().__next__
|
|
499
|
+
|
|
500
|
+
def __del__(self):
|
|
501
|
+
if hasattr(self, "_pool"):
|
|
502
|
+
kw = {"wait": False}
|
|
503
|
+
if sys.version_info[:2] >= (3, 9):
|
|
504
|
+
kw["cancel_futures"] = True
|
|
505
|
+
self._pool.shutdown(**kw)
|
|
506
|
+
|
|
507
|
+
async def to_thread(
|
|
508
|
+
self,
|
|
509
|
+
func: Callable[..., _ToThreadRetType],
|
|
510
|
+
*args,
|
|
511
|
+
wait_on_cancel: bool = False,
|
|
512
|
+
timeout: float = None,
|
|
513
|
+
debug_task_name: Optional[str] = None,
|
|
514
|
+
**kwargs,
|
|
515
|
+
) -> _ToThreadRetType:
|
|
516
|
+
if not hasattr(self, "_pool"):
|
|
517
|
+
self._pool = concurrent.futures.ThreadPoolExecutor(
|
|
518
|
+
self._thread_pool_size,
|
|
519
|
+
thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
loop = asyncio.events.get_running_loop()
|
|
523
|
+
ctx = contextvars.copy_context()
|
|
524
|
+
func_call = functools.partial(ctx.run, func, *args, **kwargs)
|
|
525
|
+
fut = loop.run_in_executor(self._pool, func_call)
|
|
526
|
+
|
|
527
|
+
if loop.get_debug():
|
|
528
|
+
# create a task and mark its name
|
|
529
|
+
default_task_name = None
|
|
530
|
+
try:
|
|
531
|
+
unwrapped = unwrap_partial_function(func)
|
|
532
|
+
default_task_name = unwrapped.__qualname__
|
|
533
|
+
if getattr(unwrapped, "__module__", None):
|
|
534
|
+
default_task_name = unwrapped.__module__ + "#" + default_task_name
|
|
535
|
+
except: # noqa # pragma: no cover
|
|
536
|
+
try:
|
|
537
|
+
default_task_name = repr(func)
|
|
538
|
+
except: # noqa
|
|
539
|
+
pass
|
|
540
|
+
debug_task_name = debug_task_name or default_task_name
|
|
541
|
+
|
|
542
|
+
async def _wait_fut(aio_fut):
|
|
543
|
+
return await aio_fut
|
|
544
|
+
|
|
545
|
+
fut = asyncio.create_task(_wait_fut(fut))
|
|
546
|
+
if sys.version_info[:2] == (3, 7):
|
|
547
|
+
# In Python3.7 we should hack the task name to print it in debug logs.
|
|
548
|
+
setattr(fut, "fd_task_name", debug_task_name)
|
|
549
|
+
else:
|
|
550
|
+
fut.set_name(debug_task_name)
|
|
551
|
+
|
|
552
|
+
try:
|
|
553
|
+
coro = fut
|
|
554
|
+
if wait_on_cancel:
|
|
555
|
+
coro = asyncio.shield(coro)
|
|
556
|
+
if timeout is not None:
|
|
557
|
+
coro = asyncio.wait_for(coro, timeout)
|
|
558
|
+
return await coro
|
|
559
|
+
except (asyncio.CancelledError, asyncio.TimeoutError) as ex:
|
|
560
|
+
if not wait_on_cancel:
|
|
561
|
+
raise
|
|
562
|
+
result = await fut
|
|
563
|
+
raise ToThreadCancelledError(*ex.args, result=result)
|
|
564
|
+
|
|
565
|
+
def ensure_async_call(
|
|
566
|
+
self,
|
|
567
|
+
func: Callable[..., _ToThreadRetType],
|
|
568
|
+
*args,
|
|
569
|
+
wait_on_cancel: bool = False,
|
|
570
|
+
**kwargs,
|
|
571
|
+
) -> Awaitable[_ToThreadRetType]:
|
|
572
|
+
if asyncio.iscoroutinefunction(func):
|
|
573
|
+
return func(*args, **kwargs)
|
|
574
|
+
return self.to_thread(func, *args, wait_on_cancel=wait_on_cancel, **kwargs)
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
class PatchableMixin:
|
|
578
|
+
"""Patch not None field to dest_obj"""
|
|
579
|
+
|
|
580
|
+
__slots__ = ()
|
|
581
|
+
|
|
582
|
+
_patchable_attrs = tuple()
|
|
583
|
+
|
|
584
|
+
def patch_to(self, dest_obj) -> None:
|
|
585
|
+
for attr in self._patchable_attrs:
|
|
586
|
+
val = getattr(self, attr, None)
|
|
587
|
+
if val is not None:
|
|
588
|
+
setattr(dest_obj, attr, val)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def config_odps_default_options():
|
|
592
|
+
from odps import options as odps_options
|
|
593
|
+
|
|
594
|
+
odps_options.sql.settings = {
|
|
595
|
+
"odps.longtime.instance": "false",
|
|
596
|
+
"odps.sql.session.select.only": "false",
|
|
597
|
+
"metaservice.client.cache.enable": "false",
|
|
598
|
+
"odps.sql.session.result.cache.enable": "false",
|
|
599
|
+
"odps.sql.submit.mode": "script",
|
|
600
|
+
"odps.sql.job.max.time.hours": 72,
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def to_hashable(obj: Any) -> Hashable:
|
|
605
|
+
if isinstance(obj, Mapping):
|
|
606
|
+
items = type(obj)((k, to_hashable(v)) for k, v in obj.items())
|
|
607
|
+
elif not isinstance(obj, str) and isinstance(obj, Iterable):
|
|
608
|
+
items = tuple(to_hashable(item) for item in obj)
|
|
609
|
+
elif isinstance(obj, Hashable):
|
|
610
|
+
items = obj
|
|
611
|
+
else:
|
|
612
|
+
raise TypeError(type(obj))
|
|
613
|
+
return items
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def estimate_pandas_size(
|
|
617
|
+
pd_obj, max_samples: int = 10, min_sample_rows: int = 100
|
|
618
|
+
) -> int:
|
|
619
|
+
if len(pd_obj) <= min_sample_rows or isinstance(pd_obj, pd.RangeIndex):
|
|
620
|
+
return sys.getsizeof(pd_obj)
|
|
621
|
+
if isinstance(pd_obj, pd.MultiIndex):
|
|
622
|
+
# MultiIndex's sample size can't be used to estimate
|
|
623
|
+
return sys.getsizeof(pd_obj)
|
|
624
|
+
|
|
625
|
+
def _is_fast_dtype(dtype):
|
|
626
|
+
if isinstance(dtype, np.dtype):
|
|
627
|
+
return np.issubdtype(dtype, np.number)
|
|
628
|
+
else:
|
|
629
|
+
return isinstance(dtype, ArrowDtype)
|
|
630
|
+
|
|
631
|
+
dtypes = []
|
|
632
|
+
is_series = False
|
|
633
|
+
if isinstance(pd_obj, pd.DataFrame):
|
|
634
|
+
dtypes.extend(pd_obj.dtypes)
|
|
635
|
+
index_obj = pd_obj.index
|
|
636
|
+
elif isinstance(pd_obj, pd.Series):
|
|
637
|
+
dtypes.append(pd_obj.dtype)
|
|
638
|
+
index_obj = pd_obj.index
|
|
639
|
+
is_series = True
|
|
640
|
+
else:
|
|
641
|
+
index_obj = pd_obj
|
|
642
|
+
|
|
643
|
+
# handling possible MultiIndex
|
|
644
|
+
if hasattr(index_obj, "dtypes"):
|
|
645
|
+
dtypes.extend(index_obj.dtypes)
|
|
646
|
+
else:
|
|
647
|
+
dtypes.append(index_obj.dtype)
|
|
648
|
+
|
|
649
|
+
if all(_is_fast_dtype(dtype) for dtype in dtypes):
|
|
650
|
+
return sys.getsizeof(pd_obj)
|
|
651
|
+
|
|
652
|
+
indices = np.sort(np.random.choice(len(pd_obj), size=max_samples, replace=False))
|
|
653
|
+
iloc = pd_obj if isinstance(pd_obj, pd.Index) else pd_obj.iloc
|
|
654
|
+
if isinstance(index_obj, pd.MultiIndex):
|
|
655
|
+
# MultiIndex's sample size is much greater than expected, thus we calculate
|
|
656
|
+
# the size separately.
|
|
657
|
+
index_size = sys.getsizeof(pd_obj.index)
|
|
658
|
+
if is_series:
|
|
659
|
+
sample_frame_size = iloc[indices].memory_usage(deep=True, index=False)
|
|
660
|
+
else:
|
|
661
|
+
sample_frame_size = iloc[indices].memory_usage(deep=True, index=False).sum()
|
|
662
|
+
return index_size + sample_frame_size * len(pd_obj) // max_samples
|
|
663
|
+
else:
|
|
664
|
+
sample_size = sys.getsizeof(iloc[indices])
|
|
665
|
+
return sample_size * len(pd_obj) // max_samples
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def estimate_table_size(odps_entry, full_table_name: str, partitions: List[str] = None):
|
|
669
|
+
try:
|
|
670
|
+
data_src = odps_entry.get_table(full_table_name)
|
|
671
|
+
if isinstance(partitions, str):
|
|
672
|
+
partitions = [partitions]
|
|
673
|
+
if not partitions:
|
|
674
|
+
size_mul = 1
|
|
675
|
+
else:
|
|
676
|
+
size_mul = len(partitions)
|
|
677
|
+
data_src = data_src.partitions[partitions[0]]
|
|
678
|
+
return size_mul * data_src.size
|
|
679
|
+
except:
|
|
680
|
+
return float("inf")
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
class ModulePlaceholder:
|
|
684
|
+
def __init__(self, mod_name: str):
|
|
685
|
+
self._mod_name = mod_name
|
|
686
|
+
|
|
687
|
+
def _raises(self):
|
|
688
|
+
raise AttributeError(f"{self._mod_name} is required but not installed.")
|
|
689
|
+
|
|
690
|
+
def __getattr__(self, key):
|
|
691
|
+
self._raises()
|
|
692
|
+
|
|
693
|
+
def __call__(self, *_args, **_kwargs):
|
|
694
|
+
self._raises()
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def lazy_import(
|
|
698
|
+
name: str,
|
|
699
|
+
package: str = None,
|
|
700
|
+
globals: Dict = None, # pylint: disable=redefined-builtin
|
|
701
|
+
locals: Dict = None, # pylint: disable=redefined-builtin
|
|
702
|
+
rename: str = None,
|
|
703
|
+
placeholder: bool = False,
|
|
704
|
+
):
|
|
705
|
+
rename = rename or name
|
|
706
|
+
prefix_name = name.split(".", 1)[0]
|
|
707
|
+
globals = globals or inspect.currentframe().f_back.f_globals
|
|
708
|
+
|
|
709
|
+
class LazyModule(object):
|
|
710
|
+
def __init__(self):
|
|
711
|
+
self._on_loads = []
|
|
712
|
+
|
|
713
|
+
def __getattr__(self, item):
|
|
714
|
+
if item.startswith("_pytest") or item in ("__bases__", "__test__"):
|
|
715
|
+
raise AttributeError(item)
|
|
716
|
+
|
|
717
|
+
real_mod = importlib.import_module(name, package=package)
|
|
718
|
+
if rename in globals:
|
|
719
|
+
globals[rename] = real_mod
|
|
720
|
+
elif locals is not None:
|
|
721
|
+
locals[rename] = real_mod
|
|
722
|
+
ret = getattr(real_mod, item)
|
|
723
|
+
for on_load_func in self._on_loads:
|
|
724
|
+
on_load_func()
|
|
725
|
+
# make sure on_load hooks only executed once
|
|
726
|
+
self._on_loads = []
|
|
727
|
+
return ret
|
|
728
|
+
|
|
729
|
+
def add_load_handler(self, func: Callable):
|
|
730
|
+
self._on_loads.append(func)
|
|
731
|
+
return func
|
|
732
|
+
|
|
733
|
+
if pkgutil.find_loader(prefix_name) is not None:
|
|
734
|
+
return LazyModule()
|
|
735
|
+
elif placeholder:
|
|
736
|
+
return ModulePlaceholder(prefix_name)
|
|
737
|
+
else:
|
|
738
|
+
return None
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def sbytes(x: Any) -> bytes:
|
|
742
|
+
# NB: bytes() in Python 3 has different semantic with Python 2, see: help(bytes)
|
|
743
|
+
from numbers import Number
|
|
744
|
+
|
|
745
|
+
if x is None or isinstance(x, Number):
|
|
746
|
+
return bytes(str(x), encoding="ascii")
|
|
747
|
+
elif isinstance(x, list):
|
|
748
|
+
return bytes("[" + ", ".join([str(k) for k in x]) + "]", encoding="utf-8")
|
|
749
|
+
elif isinstance(x, tuple):
|
|
750
|
+
return bytes("(" + ", ".join([str(k) for k in x]) + ")", encoding="utf-8")
|
|
751
|
+
elif isinstance(x, str):
|
|
752
|
+
return bytes(x, encoding="utf-8")
|
|
753
|
+
else:
|
|
754
|
+
try:
|
|
755
|
+
return bytes(x)
|
|
756
|
+
except TypeError:
|
|
757
|
+
return bytes(str(x), encoding="utf-8")
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def is_full_slice(slc: Any) -> bool:
|
|
761
|
+
"""Check if the input is a full slice ((:) or (0:))"""
|
|
762
|
+
return (
|
|
763
|
+
isinstance(slc, slice)
|
|
764
|
+
and (slc.start == 0 or slc.start is None)
|
|
765
|
+
and slc.stop is None
|
|
766
|
+
and slc.step is None
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
_enter_counter = 0
|
|
771
|
+
_initial_session = None
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def enter_current_session(func: Callable):
|
|
775
|
+
@functools.wraps(func)
|
|
776
|
+
def wrapped(cls, ctx, op):
|
|
777
|
+
from .session import AbstractSession, get_default_session
|
|
778
|
+
|
|
779
|
+
global _enter_counter, _initial_session
|
|
780
|
+
# skip in some test cases
|
|
781
|
+
if not hasattr(ctx, "get_current_session"):
|
|
782
|
+
return func(cls, ctx, op)
|
|
783
|
+
|
|
784
|
+
with AbstractSession._lock:
|
|
785
|
+
if _enter_counter == 0:
|
|
786
|
+
# to handle nested call, only set initial session
|
|
787
|
+
# in first call
|
|
788
|
+
session = ctx.get_current_session()
|
|
789
|
+
_initial_session = get_default_session()
|
|
790
|
+
session.as_default()
|
|
791
|
+
_enter_counter += 1
|
|
792
|
+
|
|
793
|
+
try:
|
|
794
|
+
result = func(cls, ctx, op)
|
|
795
|
+
finally:
|
|
796
|
+
with AbstractSession._lock:
|
|
797
|
+
_enter_counter -= 1
|
|
798
|
+
if _enter_counter == 0:
|
|
799
|
+
# set previous session when counter is 0
|
|
800
|
+
if _initial_session:
|
|
801
|
+
_initial_session.as_default()
|
|
802
|
+
else:
|
|
803
|
+
AbstractSession.reset_default()
|
|
804
|
+
return result
|
|
805
|
+
|
|
806
|
+
return wrapped
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
_func_token_cache = weakref.WeakKeyDictionary()
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def _get_func_token_values(func):
|
|
813
|
+
if hasattr(func, "__code__"):
|
|
814
|
+
tokens = [func.__code__.co_code]
|
|
815
|
+
if func.__closure__ is not None:
|
|
816
|
+
cvars = tuple(x.cell_contents for x in func.__closure__)
|
|
817
|
+
tokens.append(cvars)
|
|
818
|
+
return tokens
|
|
819
|
+
else:
|
|
820
|
+
tokens = []
|
|
821
|
+
while isinstance(func, functools.partial):
|
|
822
|
+
tokens.extend([func.args, func.keywords])
|
|
823
|
+
func = func.func
|
|
824
|
+
if hasattr(func, "__code__"):
|
|
825
|
+
tokens.extend(_get_func_token_values(func))
|
|
826
|
+
elif isinstance(func, types.BuiltinFunctionType):
|
|
827
|
+
tokens.extend([func.__module__, func.__qualname__])
|
|
828
|
+
else:
|
|
829
|
+
tokens.append(func)
|
|
830
|
+
return tokens
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def get_func_token(func):
|
|
834
|
+
try:
|
|
835
|
+
token = _func_token_cache.get(func)
|
|
836
|
+
if token is None:
|
|
837
|
+
fields = _get_func_token_values(func)
|
|
838
|
+
token = tokenize(*fields)
|
|
839
|
+
_func_token_cache[func] = token
|
|
840
|
+
return token
|
|
841
|
+
except TypeError: # cannot create weak reference to func like 'numpy.ufunc'
|
|
842
|
+
return tokenize(*_get_func_token_values(func))
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
_io_quiet_local = threading.local()
|
|
846
|
+
_io_quiet_lock = threading.Lock()
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
class _QuietIOWrapper:
|
|
850
|
+
def __init__(self, wrapped):
|
|
851
|
+
self.wrapped = wrapped
|
|
852
|
+
|
|
853
|
+
def __getattr__(self, item):
|
|
854
|
+
return getattr(self.wrapped, item)
|
|
855
|
+
|
|
856
|
+
def write(self, d):
|
|
857
|
+
if getattr(_io_quiet_local, "is_wrapped", False):
|
|
858
|
+
return 0
|
|
859
|
+
return self.wrapped.write(d)
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
@contextmanager
|
|
863
|
+
def quiet_stdio():
|
|
864
|
+
"""Quiets standard outputs when inferring types of functions"""
|
|
865
|
+
with _io_quiet_lock:
|
|
866
|
+
_io_quiet_local.is_wrapped = True
|
|
867
|
+
sys.stdout = _QuietIOWrapper(sys.stdout)
|
|
868
|
+
sys.stderr = _QuietIOWrapper(sys.stderr)
|
|
869
|
+
|
|
870
|
+
try:
|
|
871
|
+
yield
|
|
872
|
+
finally:
|
|
873
|
+
with _io_quiet_lock:
|
|
874
|
+
sys.stdout = sys.stdout.wrapped
|
|
875
|
+
sys.stderr = sys.stderr.wrapped
|
|
876
|
+
if not isinstance(sys.stdout, _QuietIOWrapper):
|
|
877
|
+
_io_quiet_local.is_wrapped = False
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
# from https://github.com/ericvsmith/dataclasses/blob/master/dataclass_tools.py
|
|
881
|
+
# released under Apache License 2.0
|
|
882
|
+
def dataslots(cls):
|
|
883
|
+
# Need to create a new class, since we can't set __slots__
|
|
884
|
+
# after a class has been created.
|
|
885
|
+
|
|
886
|
+
# Make sure __slots__ isn't already set.
|
|
887
|
+
if "__slots__" in cls.__dict__: # pragma: no cover
|
|
888
|
+
raise TypeError(f"{cls.__name__} already specifies __slots__")
|
|
889
|
+
|
|
890
|
+
# Create a new dict for our new class.
|
|
891
|
+
cls_dict = dict(cls.__dict__)
|
|
892
|
+
field_names = tuple(f.name for f in dataclasses.fields(cls))
|
|
893
|
+
cls_dict["__slots__"] = field_names
|
|
894
|
+
for field_name in field_names:
|
|
895
|
+
# Remove our attributes, if present. They'll still be
|
|
896
|
+
# available in _MARKER.
|
|
897
|
+
cls_dict.pop(field_name, None)
|
|
898
|
+
# Remove __dict__ itself.
|
|
899
|
+
cls_dict.pop("__dict__", None)
|
|
900
|
+
# And finally create the class.
|
|
901
|
+
qualname = getattr(cls, "__qualname__", None)
|
|
902
|
+
cls = type(cls)(cls.__name__, cls.__bases__, cls_dict)
|
|
903
|
+
if qualname is not None:
|
|
904
|
+
cls.__qualname__ = qualname
|
|
905
|
+
return cls
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
def adapt_docstring(doc: str) -> str:
|
|
909
|
+
"""
|
|
910
|
+
Adapt numpy-style docstrings to MaxFrame docstring.
|
|
911
|
+
|
|
912
|
+
This util function will add MaxFrame imports, replace object references
|
|
913
|
+
and add execute calls. Note that check is needed after replacement.
|
|
914
|
+
"""
|
|
915
|
+
if doc is None:
|
|
916
|
+
return None
|
|
917
|
+
|
|
918
|
+
lines = []
|
|
919
|
+
first_prompt = True
|
|
920
|
+
prev_prompt = False
|
|
921
|
+
has_numpy = "np." in doc
|
|
922
|
+
has_pandas = "pd." in doc
|
|
923
|
+
|
|
924
|
+
for line in doc.splitlines():
|
|
925
|
+
sp = line.strip()
|
|
926
|
+
if sp.startswith(">>>") or sp.startswith("..."):
|
|
927
|
+
prev_prompt = True
|
|
928
|
+
if first_prompt:
|
|
929
|
+
first_prompt = False
|
|
930
|
+
indent = "".join(itertools.takewhile(lambda x: x in (" ", "\t"), line))
|
|
931
|
+
if has_numpy:
|
|
932
|
+
lines.extend([indent + ">>> import maxframe.tensor as mt"])
|
|
933
|
+
if has_pandas:
|
|
934
|
+
lines.extend([indent + ">>> import maxframe.dataframe as md"])
|
|
935
|
+
line = line.replace("np.", "mt.").replace("pd.", "md.")
|
|
936
|
+
elif prev_prompt:
|
|
937
|
+
prev_prompt = False
|
|
938
|
+
if sp and lines[-1].strip().strip("."):
|
|
939
|
+
# need prev line contains chars other than dots
|
|
940
|
+
lines[-1] += ".execute()"
|
|
941
|
+
lines.append(line)
|
|
942
|
+
return "\n".join(lines)
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def stringify_path(path: Union[str, os.PathLike]) -> str:
|
|
946
|
+
"""
|
|
947
|
+
Convert *path* to a string or unicode path if possible.
|
|
948
|
+
"""
|
|
949
|
+
if isinstance(path, str):
|
|
950
|
+
return path
|
|
951
|
+
|
|
952
|
+
# checking whether path implements the filesystem protocol
|
|
953
|
+
try:
|
|
954
|
+
return path.__fspath__()
|
|
955
|
+
except AttributeError:
|
|
956
|
+
raise TypeError("not a path-like object")
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
_memory_size_indices = {"": 0, "b": 0, "k": 1, "m": 2, "g": 3, "t": 4}
|
|
960
|
+
|
|
961
|
+
_size_pattern = re.compile(r"^([0-9.-]+)\s*([a-z]*)$")
|
|
962
|
+
|
|
963
|
+
|
|
964
|
+
def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
|
|
965
|
+
"""
|
|
966
|
+
Parse a human-readable size representation into a numeric value.
|
|
967
|
+
|
|
968
|
+
This function converts various size formats into their corresponding
|
|
969
|
+
float values. It supports:
|
|
970
|
+
- Raw numbers (e.g., 1024)
|
|
971
|
+
- Percentages (e.g., "50%")
|
|
972
|
+
- Size units (e.g., "10KB", "5.5GB", "2MiB")
|
|
973
|
+
|
|
974
|
+
The function recognizes standard binary prefixes (K, M, G, T, etc.) and
|
|
975
|
+
handles different suffix variations (B, iB, etc.).
|
|
976
|
+
|
|
977
|
+
Parameters
|
|
978
|
+
----------
|
|
979
|
+
value : Union[str, int, float]
|
|
980
|
+
The size value to parse, can be a string, int, or float
|
|
981
|
+
|
|
982
|
+
Returns
|
|
983
|
+
-------
|
|
984
|
+
Tuple[float, bool]
|
|
985
|
+
A tuple of (parsed_value, is_percentage)
|
|
986
|
+
- parsed_value: The parsed numeric value
|
|
987
|
+
- is_percentage: True if the input was a percentage, False otherwise
|
|
988
|
+
"""
|
|
989
|
+
if isinstance(value, numbers.Number):
|
|
990
|
+
return float(value), False
|
|
991
|
+
|
|
992
|
+
if not isinstance(value, str):
|
|
993
|
+
raise TypeError(f"Expected string or number, got {type(value).__name__}")
|
|
994
|
+
|
|
995
|
+
value = value.strip().lower()
|
|
996
|
+
|
|
997
|
+
# Handle percentage values
|
|
998
|
+
if value.endswith("%"):
|
|
999
|
+
return float(value[:-1]) / 100, True
|
|
1000
|
+
|
|
1001
|
+
# Parse the value into numeric and unit parts
|
|
1002
|
+
match = _size_pattern.match(value)
|
|
1003
|
+
if not match:
|
|
1004
|
+
raise ValueError(f"Cannot parse size value: {value}")
|
|
1005
|
+
|
|
1006
|
+
number_str, unit = match.groups()
|
|
1007
|
+
|
|
1008
|
+
# convert to float
|
|
1009
|
+
number = float(number_str)
|
|
1010
|
+
|
|
1011
|
+
# if no unit, return the number
|
|
1012
|
+
if not unit:
|
|
1013
|
+
return number, False
|
|
1014
|
+
|
|
1015
|
+
# Validate the unit prefix
|
|
1016
|
+
if unit[0] not in _memory_size_indices:
|
|
1017
|
+
valid_prefixes = ", ".join(sorted(_memory_size_indices.keys()))
|
|
1018
|
+
raise ValueError(
|
|
1019
|
+
f"Unknown unit prefix: '{unit[0]}', valid prefixes are {valid_prefixes}"
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
# Check for valid unit suffix
|
|
1023
|
+
if len(unit) > 1 and unit[1:] not in ("ib", "b", "i", ""):
|
|
1024
|
+
raise ValueError(f"Invalid size unit suffix: {unit}")
|
|
1025
|
+
|
|
1026
|
+
is_binary_unit = "i" in unit.lower()
|
|
1027
|
+
# calc the multiplier
|
|
1028
|
+
base = 1024 if is_binary_unit else 1000
|
|
1029
|
+
multiplier = base ** _memory_size_indices[unit[0]]
|
|
1030
|
+
|
|
1031
|
+
return number * multiplier, False
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
def parse_size_to_megabytes(
|
|
1035
|
+
value: Union[str, int, float], default_number_unit: str = "GiB"
|
|
1036
|
+
) -> float:
|
|
1037
|
+
try:
|
|
1038
|
+
value = float(value)
|
|
1039
|
+
except BaseException:
|
|
1040
|
+
pass
|
|
1041
|
+
|
|
1042
|
+
if isinstance(value, numbers.Number):
|
|
1043
|
+
if not default_number_unit:
|
|
1044
|
+
raise ValueError(
|
|
1045
|
+
"`default_number_unit` must be provided when give a number value"
|
|
1046
|
+
)
|
|
1047
|
+
return parse_size_to_megabytes(
|
|
1048
|
+
f"{value}{default_number_unit}", default_number_unit
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
bytes_number, is_percentage = parse_readable_size(value)
|
|
1052
|
+
if is_percentage:
|
|
1053
|
+
raise ValueError("Percentage size is not supported to parse")
|
|
1054
|
+
|
|
1055
|
+
return bytes_number / (1024**2) # convert to megabytes
|
|
1056
|
+
|
|
1057
|
+
|
|
1058
|
+
def remove_suffix(value: str, suffix: str) -> Tuple[str, bool]:
|
|
1059
|
+
"""
|
|
1060
|
+
Remove a suffix from a given string if it exists.
|
|
1061
|
+
|
|
1062
|
+
Parameters
|
|
1063
|
+
----------
|
|
1064
|
+
value : str
|
|
1065
|
+
The original string.
|
|
1066
|
+
suffix : str
|
|
1067
|
+
The suffix to be removed.
|
|
1068
|
+
|
|
1069
|
+
Returns
|
|
1070
|
+
-------
|
|
1071
|
+
Tuple[str, bool]
|
|
1072
|
+
A tuple containing the modified string and a boolean indicating whether the suffix was found.
|
|
1073
|
+
"""
|
|
1074
|
+
|
|
1075
|
+
# Check if the suffix is an empty string
|
|
1076
|
+
if len(suffix) == 0:
|
|
1077
|
+
# If the suffix is empty, return the original string with True
|
|
1078
|
+
return value, True
|
|
1079
|
+
|
|
1080
|
+
# Check if the length of the value is less than the length of the suffix
|
|
1081
|
+
if len(value) < len(suffix):
|
|
1082
|
+
# If the value is shorter than the suffix, it cannot have the suffix
|
|
1083
|
+
return value, False
|
|
1084
|
+
|
|
1085
|
+
# Check if the suffix matches the end of the value
|
|
1086
|
+
match = value.endswith(suffix)
|
|
1087
|
+
|
|
1088
|
+
# If the suffix is found, remove it; otherwise, return the original string
|
|
1089
|
+
if match:
|
|
1090
|
+
return value[: -len(suffix)], match
|
|
1091
|
+
else:
|
|
1092
|
+
return value, match
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def find_objects(
|
|
1096
|
+
nested: Union[List, Dict],
|
|
1097
|
+
types: Union[None, Type, Tuple[Type]] = None,
|
|
1098
|
+
checker: Callable[..., bool] = None,
|
|
1099
|
+
) -> List:
|
|
1100
|
+
found = []
|
|
1101
|
+
stack = [nested]
|
|
1102
|
+
|
|
1103
|
+
while len(stack) > 0:
|
|
1104
|
+
it = stack.pop()
|
|
1105
|
+
if (types is not None and isinstance(it, types)) or (
|
|
1106
|
+
checker is not None and checker(it)
|
|
1107
|
+
):
|
|
1108
|
+
found.append(it)
|
|
1109
|
+
continue
|
|
1110
|
+
|
|
1111
|
+
if isinstance(it, (list, tuple, set)):
|
|
1112
|
+
stack.extend(list(it)[::-1])
|
|
1113
|
+
elif isinstance(it, dict):
|
|
1114
|
+
stack.extend(list(it.values())[::-1])
|
|
1115
|
+
|
|
1116
|
+
return found
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
def replace_objects(nested: Union[List, Dict], mapping: Mapping) -> Union[List, Dict]:
|
|
1120
|
+
if not mapping:
|
|
1121
|
+
return nested
|
|
1122
|
+
|
|
1123
|
+
if isinstance(nested, dict):
|
|
1124
|
+
vals = list(nested.values())
|
|
1125
|
+
else:
|
|
1126
|
+
vals = list(nested)
|
|
1127
|
+
|
|
1128
|
+
new_vals = []
|
|
1129
|
+
for val in vals:
|
|
1130
|
+
if isinstance(val, (dict, list, tuple, set)):
|
|
1131
|
+
new_val = replace_objects(val, mapping)
|
|
1132
|
+
else:
|
|
1133
|
+
try:
|
|
1134
|
+
new_val = mapping.get(val, val)
|
|
1135
|
+
except TypeError:
|
|
1136
|
+
new_val = val
|
|
1137
|
+
new_vals.append(new_val)
|
|
1138
|
+
|
|
1139
|
+
if isinstance(nested, dict):
|
|
1140
|
+
return type(nested)((k, v) for k, v in zip(nested.keys(), new_vals))
|
|
1141
|
+
else:
|
|
1142
|
+
return type(nested)(new_vals)
|
|
1143
|
+
|
|
1144
|
+
|
|
1145
|
+
def trait_from_env(
|
|
1146
|
+
trait_name: str, env: str, trait: Optional[traitlets.TraitType] = None
|
|
1147
|
+
):
|
|
1148
|
+
if trait is None:
|
|
1149
|
+
prev_locals = inspect.stack()[1].frame.f_locals
|
|
1150
|
+
trait = prev_locals[trait_name]
|
|
1151
|
+
|
|
1152
|
+
default_value = trait.default_value
|
|
1153
|
+
sub_trait: traitlets.TraitType = getattr(trait, "_trait", None)
|
|
1154
|
+
|
|
1155
|
+
def default_value_simple(self):
|
|
1156
|
+
env_val = os.getenv(env, default_value)
|
|
1157
|
+
if isinstance(env_val, (str, bytes)):
|
|
1158
|
+
return trait.from_string(env_val)
|
|
1159
|
+
return env_val
|
|
1160
|
+
|
|
1161
|
+
def default_value_list(self):
|
|
1162
|
+
env_val = os.getenv(env, default_value)
|
|
1163
|
+
if env_val is None or isinstance(env_val, traitlets.Sentinel):
|
|
1164
|
+
return env_val
|
|
1165
|
+
|
|
1166
|
+
parts = env_val.split(",") if env_val else []
|
|
1167
|
+
if sub_trait:
|
|
1168
|
+
return [sub_trait.from_string(s) for s in parts]
|
|
1169
|
+
else:
|
|
1170
|
+
return parts
|
|
1171
|
+
|
|
1172
|
+
if isinstance(trait, traitlets.List):
|
|
1173
|
+
default_value_fun = default_value_list
|
|
1174
|
+
else: # pragma: no cover
|
|
1175
|
+
default_value_fun = default_value_simple
|
|
1176
|
+
|
|
1177
|
+
default_value_fun.__name__ = trait_name + "_default"
|
|
1178
|
+
return traitlets.default(trait_name)(default_value_fun)
|
|
1179
|
+
|
|
1180
|
+
|
|
1181
|
+
def relay_future(
|
|
1182
|
+
dest: Union[asyncio.Future, concurrent.futures.Future],
|
|
1183
|
+
src: Union[asyncio.Future, concurrent.futures.Future],
|
|
1184
|
+
) -> None:
|
|
1185
|
+
def cb(fut: Union[asyncio.Future, concurrent.futures.Future]):
|
|
1186
|
+
try:
|
|
1187
|
+
dest.set_result(fut.result())
|
|
1188
|
+
except BaseException as ex:
|
|
1189
|
+
dest.set_exception(ex)
|
|
1190
|
+
|
|
1191
|
+
src.add_done_callback(cb)
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
_arrow_type_constructors = {}
|
|
1195
|
+
if pa:
|
|
1196
|
+
_arrow_type_constructors = {
|
|
1197
|
+
"bool": pa.bool_,
|
|
1198
|
+
"list": lambda x: pa.list_(dict(x)["item"]),
|
|
1199
|
+
"map": lambda x: pa.map_(*x),
|
|
1200
|
+
"struct": pa.struct,
|
|
1201
|
+
"fixed_size_binary": pa.binary,
|
|
1202
|
+
"halffloat": pa.float16,
|
|
1203
|
+
"float": pa.float32,
|
|
1204
|
+
"double": pa.float64,
|
|
1205
|
+
"decimal": pa.decimal128,
|
|
1206
|
+
# repr() of date32 and date64 has `day` or `ms`
|
|
1207
|
+
# which is not needed in constructors
|
|
1208
|
+
"date32": lambda *_: pa.date32(),
|
|
1209
|
+
"date64": lambda *_: pa.date64(),
|
|
1210
|
+
}
|
|
1211
|
+
_plain_arrow_types = """
|
|
1212
|
+
null
|
|
1213
|
+
int8 int16 int32 int64
|
|
1214
|
+
uint8 uint16 uint32 uint64
|
|
1215
|
+
float16 float32 float64
|
|
1216
|
+
decimal128 decimal256
|
|
1217
|
+
string utf8 binary
|
|
1218
|
+
time32 time64 duration timestamp
|
|
1219
|
+
month_day_nano_interval
|
|
1220
|
+
"""
|
|
1221
|
+
for _type_name in _plain_arrow_types.split():
|
|
1222
|
+
try:
|
|
1223
|
+
_arrow_type_constructors[_type_name] = getattr(pa, _type_name)
|
|
1224
|
+
except AttributeError: # pragma: no cover
|
|
1225
|
+
pass
|
|
1226
|
+
|
|
1227
|
+
|
|
1228
|
+
def arrow_type_from_str(type_str: str) -> pa.DataType:
|
|
1229
|
+
"""
|
|
1230
|
+
Convert arrow type representations (for inst., list<item: int64>)
|
|
1231
|
+
into arrow DataType instances
|
|
1232
|
+
"""
|
|
1233
|
+
# enable consecutive brackets to be tokenized
|
|
1234
|
+
type_str = type_str.replace("<", "< ").replace(">", " >")
|
|
1235
|
+
token_iter = pytokenize.tokenize(io.BytesIO(type_str.encode()).readline)
|
|
1236
|
+
value_stack, op_stack = [], []
|
|
1237
|
+
|
|
1238
|
+
def _pop_make_type(with_args: bool = False, combined: bool = True):
|
|
1239
|
+
"""
|
|
1240
|
+
Pops tops of value stacks, creates a DataType instance and push back
|
|
1241
|
+
|
|
1242
|
+
Parameters
|
|
1243
|
+
----------
|
|
1244
|
+
with_args: bool
|
|
1245
|
+
if True, will contain next item (parameter list) in
|
|
1246
|
+
the value stack as parameters
|
|
1247
|
+
combined: bool
|
|
1248
|
+
if True, will use first element of the top of the value stack
|
|
1249
|
+
in DataType constructors
|
|
1250
|
+
"""
|
|
1251
|
+
args = () if not with_args else (value_stack.pop(-1),)
|
|
1252
|
+
if not combined:
|
|
1253
|
+
args = args[0]
|
|
1254
|
+
type_name = value_stack.pop(-1)
|
|
1255
|
+
if isinstance(type_name, pa.DataType):
|
|
1256
|
+
value_stack.append(type_name)
|
|
1257
|
+
elif type_name in _arrow_type_constructors:
|
|
1258
|
+
value_stack.append(_arrow_type_constructors[type_name](*args))
|
|
1259
|
+
else: # pragma: no cover
|
|
1260
|
+
value_stack.append(type_name)
|
|
1261
|
+
|
|
1262
|
+
def _pop_make_struct_field():
|
|
1263
|
+
"""parameterized sub-types need to be represented as tuples"""
|
|
1264
|
+
nonlocal value_stack
|
|
1265
|
+
|
|
1266
|
+
op_stack.pop(-1)
|
|
1267
|
+
if isinstance(value_stack[-1], str) and value_stack[-1].lower() in (
|
|
1268
|
+
"null",
|
|
1269
|
+
"not null",
|
|
1270
|
+
):
|
|
1271
|
+
values = value_stack[-3:]
|
|
1272
|
+
value_stack = value_stack[:-3]
|
|
1273
|
+
values[-1] = values[-1] == "null"
|
|
1274
|
+
else:
|
|
1275
|
+
values = value_stack[-2:]
|
|
1276
|
+
value_stack = value_stack[:-2]
|
|
1277
|
+
value_stack.append(tuple(values))
|
|
1278
|
+
|
|
1279
|
+
for token in token_iter:
|
|
1280
|
+
if token.type == pytokenize.OP:
|
|
1281
|
+
if token.string == ":":
|
|
1282
|
+
op_stack.append(token.string)
|
|
1283
|
+
elif token.string == ",":
|
|
1284
|
+
# gather previous sub-types
|
|
1285
|
+
if op_stack[-1] in ("<", ":"):
|
|
1286
|
+
_pop_make_type()
|
|
1287
|
+
if op_stack[-1] == ":":
|
|
1288
|
+
_pop_make_struct_field()
|
|
1289
|
+
|
|
1290
|
+
# put generated item into the parameter list
|
|
1291
|
+
val = value_stack.pop(-1)
|
|
1292
|
+
value_stack[-1].append(val)
|
|
1293
|
+
elif token.string in ("<", "[", "("):
|
|
1294
|
+
# pushes an empty parameter list for future use
|
|
1295
|
+
value_stack.append([])
|
|
1296
|
+
op_stack.append(token.string)
|
|
1297
|
+
elif token.string in (")", "]"):
|
|
1298
|
+
# put generated item into the parameter list
|
|
1299
|
+
val = value_stack.pop(-1)
|
|
1300
|
+
value_stack[-1].append(val)
|
|
1301
|
+
# make DataType (i.e., fixed_size_binary / decimal) given args
|
|
1302
|
+
_pop_make_type(with_args=True, combined=False)
|
|
1303
|
+
op_stack.pop(-1)
|
|
1304
|
+
elif token.string == ">":
|
|
1305
|
+
_pop_make_type()
|
|
1306
|
+
if op_stack[-1] == ":":
|
|
1307
|
+
_pop_make_struct_field()
|
|
1308
|
+
|
|
1309
|
+
# put generated item into the parameter list
|
|
1310
|
+
val = value_stack.pop(-1)
|
|
1311
|
+
value_stack[-1].append(val)
|
|
1312
|
+
# make DataType (i.e., list / map / struct) given args
|
|
1313
|
+
_pop_make_type(with_args=True)
|
|
1314
|
+
op_stack.pop(-1)
|
|
1315
|
+
elif token.type == pytokenize.NAME:
|
|
1316
|
+
if value_stack and value_stack[-1] == "not":
|
|
1317
|
+
value_stack[-1] += " " + token.string
|
|
1318
|
+
else:
|
|
1319
|
+
value_stack.append(token.string)
|
|
1320
|
+
elif token.type == pytokenize.NUMBER:
|
|
1321
|
+
value_stack.append(int(token.string))
|
|
1322
|
+
elif token.type == pytokenize.ENDMARKER:
|
|
1323
|
+
# make final type
|
|
1324
|
+
_pop_make_type()
|
|
1325
|
+
if len(value_stack) > 1:
|
|
1326
|
+
raise ValueError(f"Cannot parse type {type_str}")
|
|
1327
|
+
return value_stack[-1]
|
|
1328
|
+
|
|
1329
|
+
|
|
1330
|
+
def get_python_tag():
|
|
1331
|
+
# todo add implementation suffix for non-GIL tags when PEP703 is ready
|
|
1332
|
+
version_info = sys.version_info
|
|
1333
|
+
return f"cp{version_info[0]}{version_info[1]}"
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
def get_item_if_scalar(val: Any) -> Any:
|
|
1337
|
+
if isinstance(val, np.ndarray) and val.shape == ():
|
|
1338
|
+
return val.item()
|
|
1339
|
+
return val
|
|
1340
|
+
|
|
1341
|
+
|
|
1342
|
+
def collect_leaf_operators(root) -> List[Type]:
|
|
1343
|
+
result = []
|
|
1344
|
+
|
|
1345
|
+
def _collect(op_type):
|
|
1346
|
+
if len(op_type.__subclasses__()) == 0:
|
|
1347
|
+
result.append(op_type)
|
|
1348
|
+
for subclass in op_type.__subclasses__():
|
|
1349
|
+
_collect(subclass)
|
|
1350
|
+
|
|
1351
|
+
_collect(root)
|
|
1352
|
+
return result
|
|
1353
|
+
|
|
1354
|
+
|
|
1355
|
+
@contextmanager
|
|
1356
|
+
def sync_pyodps_options():
|
|
1357
|
+
from odps.config import option_context as pyodps_option_context
|
|
1358
|
+
|
|
1359
|
+
from .config import options
|
|
1360
|
+
|
|
1361
|
+
with pyodps_option_context() as cfg:
|
|
1362
|
+
cfg.local_timezone = options.local_timezone
|
|
1363
|
+
if options.session.enable_schema:
|
|
1364
|
+
cfg.enable_schema = options.session.enable_schema
|
|
1365
|
+
yield
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
def str_to_bool(s: Optional[str]) -> Optional[bool]:
|
|
1369
|
+
return s.lower().strip() in ("true", "1") if isinstance(s, str) else s
|
|
1370
|
+
|
|
1371
|
+
|
|
1372
|
+
def is_empty(val):
|
|
1373
|
+
if isinstance(val, (pd.DataFrame, pd.Series, pd.Index)):
|
|
1374
|
+
return val.empty
|
|
1375
|
+
return not bool(val)
|
|
1376
|
+
|
|
1377
|
+
|
|
1378
|
+
def extract_class_name(cls):
|
|
1379
|
+
return cls.__module__ + "#" + cls.__qualname__
|
|
1380
|
+
|
|
1381
|
+
|
|
1382
|
+
def flatten(nested_iterable: Union[List, Tuple]) -> List:
|
|
1383
|
+
"""
|
|
1384
|
+
Flatten a nested iterable into a list.
|
|
1385
|
+
|
|
1386
|
+
Parameters
|
|
1387
|
+
----------
|
|
1388
|
+
nested_iterable : list or tuple
|
|
1389
|
+
an iterable which can contain other iterables
|
|
1390
|
+
|
|
1391
|
+
Returns
|
|
1392
|
+
-------
|
|
1393
|
+
flattened : list
|
|
1394
|
+
|
|
1395
|
+
Examples
|
|
1396
|
+
--------
|
|
1397
|
+
>>> flatten([[0, 1], [2, 3]])
|
|
1398
|
+
[0, 1, 2, 3]
|
|
1399
|
+
>>> flatten([[0, 1], [[3], [4, 5]]])
|
|
1400
|
+
[0, 1, 3, 4, 5]
|
|
1401
|
+
"""
|
|
1402
|
+
|
|
1403
|
+
flattened = []
|
|
1404
|
+
stack = list(nested_iterable)[::-1]
|
|
1405
|
+
while len(stack) > 0:
|
|
1406
|
+
inp = stack.pop()
|
|
1407
|
+
if isinstance(inp, (tuple, list)):
|
|
1408
|
+
stack.extend(inp[::-1])
|
|
1409
|
+
else:
|
|
1410
|
+
flattened.append(inp)
|
|
1411
|
+
return flattened
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
def stack_back(flattened: List, raw: Union[List, Tuple]) -> Union[List, Tuple]:
|
|
1415
|
+
"""
|
|
1416
|
+
Organize a new iterable from a flattened list according to raw iterable.
|
|
1417
|
+
|
|
1418
|
+
Parameters
|
|
1419
|
+
----------
|
|
1420
|
+
flattened : list
|
|
1421
|
+
flattened list
|
|
1422
|
+
raw: list
|
|
1423
|
+
raw iterable
|
|
1424
|
+
|
|
1425
|
+
Returns
|
|
1426
|
+
-------
|
|
1427
|
+
ret : list
|
|
1428
|
+
|
|
1429
|
+
Examples
|
|
1430
|
+
--------
|
|
1431
|
+
>>> raw = [[0, 1], [2, [3, 4]]]
|
|
1432
|
+
>>> flattened = flatten(raw)
|
|
1433
|
+
>>> flattened
|
|
1434
|
+
[0, 1, 2, 3, 4]
|
|
1435
|
+
>>> a = [f + 1 for f in flattened]
|
|
1436
|
+
>>> a
|
|
1437
|
+
[1, 2, 3, 4, 5]
|
|
1438
|
+
>>> stack_back(a, raw)
|
|
1439
|
+
[[1, 2], [3, [4, 5]]]
|
|
1440
|
+
"""
|
|
1441
|
+
flattened_iter = iter(flattened)
|
|
1442
|
+
result = list()
|
|
1443
|
+
|
|
1444
|
+
def _stack(container, items):
|
|
1445
|
+
for item in items:
|
|
1446
|
+
if not isinstance(item, (list, tuple)):
|
|
1447
|
+
container.append(next(flattened_iter))
|
|
1448
|
+
else:
|
|
1449
|
+
new_container = list()
|
|
1450
|
+
container.append(new_container)
|
|
1451
|
+
_stack(new_container, item)
|
|
1452
|
+
|
|
1453
|
+
return container
|
|
1454
|
+
|
|
1455
|
+
return _stack(result, raw)
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
_RetryRetType = TypeVar("_RetryRetType")
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
def call_with_retry(
|
|
1462
|
+
func: Callable[..., _RetryRetType],
|
|
1463
|
+
*args,
|
|
1464
|
+
retry_times: Optional[int] = None,
|
|
1465
|
+
retry_timeout: TimeoutType = None,
|
|
1466
|
+
delay: TimeoutType = None,
|
|
1467
|
+
reset_func: Optional[Callable] = None,
|
|
1468
|
+
exc_type: Union[
|
|
1469
|
+
Type[BaseException], Tuple[Type[BaseException], ...]
|
|
1470
|
+
] = BaseException,
|
|
1471
|
+
allow_interrupt: bool = True,
|
|
1472
|
+
no_raise: bool = False,
|
|
1473
|
+
is_func_async: Optional[bool] = None,
|
|
1474
|
+
**kwargs,
|
|
1475
|
+
) -> _RetryRetType:
|
|
1476
|
+
"""
|
|
1477
|
+
Retry calling function given specified times or timeout.
|
|
1478
|
+
|
|
1479
|
+
Parameters
|
|
1480
|
+
----------
|
|
1481
|
+
func: Callable
|
|
1482
|
+
function to be retried
|
|
1483
|
+
args
|
|
1484
|
+
arguments to be passed to the function
|
|
1485
|
+
retry_times: Optional[int]
|
|
1486
|
+
times to retry the function
|
|
1487
|
+
retry_timeout: TimeoutType
|
|
1488
|
+
timeout in seconds to retry the function
|
|
1489
|
+
delay: TimeoutType
|
|
1490
|
+
delay in seconds between every trial
|
|
1491
|
+
reset_func: Callable
|
|
1492
|
+
Function to call after every trial
|
|
1493
|
+
exc_type: Type[BaseException] | Tuple[Type[BaseException], ...]
|
|
1494
|
+
Exception type for retrial
|
|
1495
|
+
allow_interrupt: bool
|
|
1496
|
+
If True, KeyboardInterrupt will stop the retry
|
|
1497
|
+
no_raise: bool
|
|
1498
|
+
If True, no exception will be raised even if all trials failed
|
|
1499
|
+
is_func_async: bool
|
|
1500
|
+
If True, func will be treated as async
|
|
1501
|
+
kwargs
|
|
1502
|
+
keyword arguments to be passed to the function
|
|
1503
|
+
|
|
1504
|
+
Returns
|
|
1505
|
+
-------
|
|
1506
|
+
Return value of the original function
|
|
1507
|
+
"""
|
|
1508
|
+
from .config import options
|
|
1509
|
+
|
|
1510
|
+
retry_num = 0
|
|
1511
|
+
retry_times = retry_times if retry_times is not None else options.retry_times
|
|
1512
|
+
delay = delay if delay is not None else options.retry_delay
|
|
1513
|
+
start_time = time.monotonic() if retry_timeout is not None else None
|
|
1514
|
+
|
|
1515
|
+
def raise_or_continue(exc: BaseException):
|
|
1516
|
+
nonlocal retry_num
|
|
1517
|
+
retry_num += 1
|
|
1518
|
+
if allow_interrupt and isinstance(exc, KeyboardInterrupt):
|
|
1519
|
+
raise exc from None
|
|
1520
|
+
if (retry_times is not None and retry_num > retry_times) or (
|
|
1521
|
+
retry_timeout is not None
|
|
1522
|
+
and start_time is not None
|
|
1523
|
+
and time.monotonic() - start_time > retry_timeout
|
|
1524
|
+
):
|
|
1525
|
+
if no_raise:
|
|
1526
|
+
return sys.exc_info()
|
|
1527
|
+
raise exc from None
|
|
1528
|
+
|
|
1529
|
+
async def async_retry():
|
|
1530
|
+
while True:
|
|
1531
|
+
try:
|
|
1532
|
+
return await func(*args, **kwargs)
|
|
1533
|
+
except exc_type as ex:
|
|
1534
|
+
await asyncio.sleep(delay)
|
|
1535
|
+
res = raise_or_continue(ex)
|
|
1536
|
+
if res is not None:
|
|
1537
|
+
return res
|
|
1538
|
+
|
|
1539
|
+
if callable(reset_func):
|
|
1540
|
+
reset_res = reset_func()
|
|
1541
|
+
if asyncio.iscoroutine(reset_res):
|
|
1542
|
+
await reset_res
|
|
1543
|
+
|
|
1544
|
+
def sync_retry():
|
|
1545
|
+
while True:
|
|
1546
|
+
try:
|
|
1547
|
+
return func(*args, **kwargs)
|
|
1548
|
+
except exc_type as ex:
|
|
1549
|
+
time.sleep(delay)
|
|
1550
|
+
res = raise_or_continue(ex)
|
|
1551
|
+
if res is not None:
|
|
1552
|
+
return res
|
|
1553
|
+
if callable(reset_func):
|
|
1554
|
+
reset_func()
|
|
1555
|
+
|
|
1556
|
+
unwrap_func = func
|
|
1557
|
+
if is_func_async is None:
|
|
1558
|
+
# unwrap to get true result if func is async
|
|
1559
|
+
while isinstance(unwrap_func, functools.partial):
|
|
1560
|
+
unwrap_func = unwrap_func.func
|
|
1561
|
+
|
|
1562
|
+
if is_func_async or asyncio.iscoroutinefunction(unwrap_func):
|
|
1563
|
+
return async_retry()
|
|
1564
|
+
else:
|
|
1565
|
+
return sync_retry()
|
|
1566
|
+
|
|
1567
|
+
|
|
1568
|
+
def update_wlm_quota_settings(session_id: str, engine_settings: Dict[str, Any]):
|
|
1569
|
+
from .config import options
|
|
1570
|
+
|
|
1571
|
+
engine_quota = engine_settings.get("odps.task.wlm.quota", None)
|
|
1572
|
+
session_quota = options.session.quota_name or None
|
|
1573
|
+
if engine_quota != session_quota and engine_quota:
|
|
1574
|
+
logger.warning(
|
|
1575
|
+
"[Session=%s] Session quota (%s) is different to SubDag engine quota (%s)",
|
|
1576
|
+
session_id,
|
|
1577
|
+
session_quota,
|
|
1578
|
+
engine_quota,
|
|
1579
|
+
)
|
|
1580
|
+
raise ValueError(
|
|
1581
|
+
"Quota name cannot be changed after sessions are created, "
|
|
1582
|
+
f"session_quota={session_quota}, engine_quota={engine_quota}"
|
|
1583
|
+
)
|
|
1584
|
+
|
|
1585
|
+
if session_quota:
|
|
1586
|
+
engine_settings["odps.task.wlm.quota"] = session_quota
|
|
1587
|
+
elif "odps.task.wlm.quota" in engine_settings:
|
|
1588
|
+
engine_settings.pop("odps.task.wlm.quota")
|
|
1589
|
+
|
|
1590
|
+
|
|
1591
|
+
def get_default_table_properties():
|
|
1592
|
+
return {"storagestrategy": "archive"}
|
|
1593
|
+
|
|
1594
|
+
|
|
1595
|
+
def copy_if_possible(obj: Any, deep=False) -> Any:
|
|
1596
|
+
try:
|
|
1597
|
+
return copy.deepcopy(obj) if deep else copy.copy(obj)
|
|
1598
|
+
except: # pragma: no cover
|
|
1599
|
+
return obj
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
def cache_tileables(*tileables):
|
|
1603
|
+
from .core import ENTITY_TYPE
|
|
1604
|
+
|
|
1605
|
+
if len(tileables) == 1 and isinstance(tileables[0], (tuple, list)):
|
|
1606
|
+
tileables = tileables[0]
|
|
1607
|
+
for t in tileables:
|
|
1608
|
+
if isinstance(t, ENTITY_TYPE):
|
|
1609
|
+
t.cache = True
|
|
1610
|
+
|
|
1611
|
+
|
|
1612
|
+
def ignore_warning(func: Callable):
|
|
1613
|
+
@functools.wraps(func)
|
|
1614
|
+
def inner(*args, **kwargs):
|
|
1615
|
+
with warnings.catch_warnings():
|
|
1616
|
+
warnings.simplefilter("ignore")
|
|
1617
|
+
return func(*args, **kwargs)
|
|
1618
|
+
|
|
1619
|
+
return inner
|
|
1620
|
+
|
|
1621
|
+
|
|
1622
|
+
class ServiceLoggerAdapter(logging.LoggerAdapter):
|
|
1623
|
+
extra_key_mapping = {}
|
|
1624
|
+
|
|
1625
|
+
def process(self, msg, kwargs):
|
|
1626
|
+
merged_extra = (self.extra or {}).copy()
|
|
1627
|
+
merged_extra.update(kwargs)
|
|
1628
|
+
|
|
1629
|
+
prefix = " ".join(
|
|
1630
|
+
f"{self.extra_key_mapping.get(k) or k.capitalize()}={merged_extra[k]}"
|
|
1631
|
+
for k in merged_extra.keys()
|
|
1632
|
+
)
|
|
1633
|
+
msg = f"[{prefix}] {msg}"
|
|
1634
|
+
return msg, kwargs
|
|
1635
|
+
|
|
1636
|
+
|
|
1637
|
+
@contextmanager
|
|
1638
|
+
def atomic_writer(filename, mode="w", **kwargs):
|
|
1639
|
+
"""
|
|
1640
|
+
Write to a file in an atomic way.
|
|
1641
|
+
"""
|
|
1642
|
+
temp_fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(filename) or ".")
|
|
1643
|
+
os.chmod(temp_path, 0o644)
|
|
1644
|
+
os.close(temp_fd) # Close the file descriptor immediately and we reopen this later.
|
|
1645
|
+
|
|
1646
|
+
try:
|
|
1647
|
+
# Write to temp file.
|
|
1648
|
+
with open(temp_path, mode, **kwargs) as temp_file:
|
|
1649
|
+
yield temp_file
|
|
1650
|
+
|
|
1651
|
+
# Replace the original file with the temp file atomically.
|
|
1652
|
+
os.replace(temp_path, filename)
|
|
1653
|
+
finally:
|
|
1654
|
+
try:
|
|
1655
|
+
os.remove(temp_path)
|
|
1656
|
+
except OSError:
|
|
1657
|
+
pass
|
|
1658
|
+
|
|
1659
|
+
|
|
1660
|
+
def prevent_called_from_pandas(level=2):
|
|
1661
|
+
"""Prevent method from being called from pandas"""
|
|
1662
|
+
frame = sys._getframe(level)
|
|
1663
|
+
called_frame = sys._getframe(1)
|
|
1664
|
+
pd_pack_location = os.path.dirname(pd.__file__)
|
|
1665
|
+
if frame.f_code.co_filename.startswith(pd_pack_location):
|
|
1666
|
+
raise AttributeError(called_frame.f_code.co_name)
|
|
1667
|
+
|
|
1668
|
+
|
|
1669
|
+
def combine_error_message_and_traceback(
|
|
1670
|
+
messages: List[str], tracebacks: List[List[str]]
|
|
1671
|
+
) -> str:
|
|
1672
|
+
tbs = []
|
|
1673
|
+
for msg, tb in zip(messages, tracebacks):
|
|
1674
|
+
tbs.append("".join([msg + "\n"] + tb))
|
|
1675
|
+
return "\nCaused by:\n".join(tbs)
|
|
1676
|
+
|
|
1677
|
+
|
|
1678
|
+
def generate_unique_id(byte_len: int) -> Generator[str, None, None]:
|
|
1679
|
+
"""
|
|
1680
|
+
The ids are ensured to be unique in one generator.
|
|
1681
|
+
DO NOT use this generator in global scope or singleton class members,
|
|
1682
|
+
as it may not free the set.
|
|
1683
|
+
"""
|
|
1684
|
+
generated_ids = set()
|
|
1685
|
+
while True:
|
|
1686
|
+
new_id = new_random_id(byte_len).hex()
|
|
1687
|
+
if new_id not in generated_ids:
|
|
1688
|
+
generated_ids.add(new_id)
|
|
1689
|
+
yield new_id
|
|
1690
|
+
|
|
1691
|
+
|
|
1692
|
+
def validate_and_adjust_resource_ratio(
|
|
1693
|
+
expect_resources: Dict[str, Any],
|
|
1694
|
+
max_memory_cpu_ratio: float = None,
|
|
1695
|
+
adjust: bool = False,
|
|
1696
|
+
) -> Tuple[Dict[str, Any], bool]:
|
|
1697
|
+
"""
|
|
1698
|
+
Validate and optionally adjust CPU:memory ratio to meet maximum requirements.
|
|
1699
|
+
|
|
1700
|
+
Args:
|
|
1701
|
+
expect_resources: Dictionary containing resource specifications
|
|
1702
|
+
max_memory_cpu_ratio: Maximum memory/cpu ratio (if None, will use config value)
|
|
1703
|
+
adjust: Whether to automatically adjust resources to meet ratio
|
|
1704
|
+
|
|
1705
|
+
Returns:
|
|
1706
|
+
Tuple of (adjusted_resources, was_adjusted)
|
|
1707
|
+
"""
|
|
1708
|
+
cpu = expect_resources.get("cpu") or 1
|
|
1709
|
+
memory = expect_resources.get("memory")
|
|
1710
|
+
|
|
1711
|
+
if cpu is None or memory is None or max_memory_cpu_ratio is None:
|
|
1712
|
+
return expect_resources, False
|
|
1713
|
+
|
|
1714
|
+
# Convert memory to GiB if it's a string
|
|
1715
|
+
cpu = max(cpu, 1)
|
|
1716
|
+
memory_gib = parse_size_to_megabytes(memory, default_number_unit="GiB") / 1024
|
|
1717
|
+
current_ratio = memory_gib / cpu
|
|
1718
|
+
|
|
1719
|
+
if current_ratio > max_memory_cpu_ratio:
|
|
1720
|
+
# Adjust CPU to meet maximum ratio, don't reduce resources
|
|
1721
|
+
recommended_cpu = math.ceil(memory_gib / max_memory_cpu_ratio)
|
|
1722
|
+
new_ratio = memory_gib / recommended_cpu
|
|
1723
|
+
if adjust:
|
|
1724
|
+
adjusted_resources = expect_resources.copy()
|
|
1725
|
+
adjusted_resources["cpu"] = recommended_cpu
|
|
1726
|
+
|
|
1727
|
+
warnings.warn(
|
|
1728
|
+
f"UDF resource auto-adjustment: Current UDF settings"
|
|
1729
|
+
f" (CPU: {cpu}, Memory: {memory_gib}Gib, Ratio: {current_ratio:.2f})"
|
|
1730
|
+
f" exceed maximum allowed ratio {max_memory_cpu_ratio:.1f}. "
|
|
1731
|
+
f"Automatically adjusted to (CPU: {recommended_cpu},"
|
|
1732
|
+
f" Memory: {memory_gib:.2f}:1Gib,"
|
|
1733
|
+
f" Ratio: {new_ratio:.2f}:1) to meet requirements."
|
|
1734
|
+
)
|
|
1735
|
+
return adjusted_resources, True
|
|
1736
|
+
else:
|
|
1737
|
+
warnings.warn(
|
|
1738
|
+
f"UDF resource ratio warning: Current UDF settings"
|
|
1739
|
+
f" (CPU: {cpu}, Memory: {memory_gib}Gib, Ratio: {current_ratio:.2f})"
|
|
1740
|
+
f" exceed maximum allowed ratio {max_memory_cpu_ratio:.1f}. "
|
|
1741
|
+
f"Consider adjusting CPU to at least {recommended_cpu}"
|
|
1742
|
+
f" (which would result in Ratio: {new_ratio:.2f}) to meet requirements."
|
|
1743
|
+
)
|
|
1744
|
+
|
|
1745
|
+
return expect_resources, False
|
|
1746
|
+
|
|
1747
|
+
|
|
1748
|
+
def get_pd_option(option_name, default=no_default):
|
|
1749
|
+
"""Get pandas option. If not exist return `default`."""
|
|
1750
|
+
try:
|
|
1751
|
+
with warnings.catch_warnings():
|
|
1752
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
1753
|
+
return pd.get_option(option_name)
|
|
1754
|
+
except (KeyError, AttributeError):
|
|
1755
|
+
if default is no_default:
|
|
1756
|
+
raise
|
|
1757
|
+
return default
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
@contextlib.contextmanager
|
|
1761
|
+
def pd_option_context(*args):
|
|
1762
|
+
arg_kv = dict(zip(args[0::2], args[1::2]))
|
|
1763
|
+
new_args = []
|
|
1764
|
+
for k, v in arg_kv.items():
|
|
1765
|
+
try:
|
|
1766
|
+
get_pd_option(k)
|
|
1767
|
+
except (KeyError, AttributeError): # pragma: no cover
|
|
1768
|
+
continue
|
|
1769
|
+
new_args.extend([k, v])
|
|
1770
|
+
if not new_args: # pragma: no cover
|
|
1771
|
+
yield
|
|
1772
|
+
else:
|
|
1773
|
+
with pd.option_context(*new_args):
|
|
1774
|
+
yield
|