maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,1282 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import contextvars
|
|
18
|
+
import copy
|
|
19
|
+
import datetime
|
|
20
|
+
import decimal
|
|
21
|
+
import hashlib
|
|
22
|
+
import importlib
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
from collections import OrderedDict
|
|
26
|
+
from functools import partial, wraps
|
|
27
|
+
from typing import Any, Dict, List, Optional, Union
|
|
28
|
+
|
|
29
|
+
import numpy as np
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
from cpython cimport PyObject
|
|
33
|
+
from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t, uintptr_t
|
|
34
|
+
from libcpp.unordered_map cimport unordered_map
|
|
35
|
+
|
|
36
|
+
from pandas.api.extensions import ExtensionDtype
|
|
37
|
+
from pandas.api.types import pandas_dtype
|
|
38
|
+
|
|
39
|
+
from .._utils import NamedType
|
|
40
|
+
|
|
41
|
+
from .._utils cimport TypeDispatcher
|
|
42
|
+
|
|
43
|
+
from ..lib import wrapped_pickle as pickle
|
|
44
|
+
from ..lib.dtypes_extension import ArrowDtype
|
|
45
|
+
from ..utils import NoDefault, arrow_type_from_str, no_default, str_to_bool
|
|
46
|
+
|
|
47
|
+
# resolve pandas pickle compatibility between <1.2 and >=1.3
|
|
48
|
+
try:
|
|
49
|
+
from pandas.core.internals import blocks as pd_blocks
|
|
50
|
+
if not hasattr(pd_blocks, "new_block") and hasattr(pd_blocks, "make_block"):
|
|
51
|
+
# register missing func that would cause errors
|
|
52
|
+
pd_blocks.new_block = pd_blocks.make_block
|
|
53
|
+
except (ImportError, AttributeError):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
import pyarrow as pa
|
|
58
|
+
except ImportError:
|
|
59
|
+
pa = None
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
import pytz
|
|
63
|
+
from pytz import BaseTzInfo as PyTZ_BaseTzInfo
|
|
64
|
+
except ImportError:
|
|
65
|
+
PyTZ_BaseTzInfo = type(None)
|
|
66
|
+
try:
|
|
67
|
+
import zoneinfo
|
|
68
|
+
from zoneinfo import ZoneInfo
|
|
69
|
+
except ImportError:
|
|
70
|
+
ZoneInfo = type(None)
|
|
71
|
+
|
|
72
|
+
BUFFER_PICKLE_PROTOCOL = max(pickle.DEFAULT_PROTOCOL, 5)
|
|
73
|
+
cdef bint HAS_PICKLE_BUFFER = pickle.HIGHEST_PROTOCOL >= 5
|
|
74
|
+
cdef bint _PANDAS_HAS_MGR = hasattr(pd.Series([0]), "_mgr")
|
|
75
|
+
cdef bint _ARROW_DTYPE_NOT_SUPPORTED = ArrowDtype is None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
cdef TypeDispatcher _serial_dispatcher = TypeDispatcher()
|
|
79
|
+
cdef dict _deserializers = dict()
|
|
80
|
+
|
|
81
|
+
cdef uint32_t _MAX_STR_PRIMITIVE_LEN = 1024
|
|
82
|
+
# prime modulus for serializer ids
|
|
83
|
+
# use the largest prime number smaller than 32767
|
|
84
|
+
cdef int32_t _SERIALIZER_ID_PRIME = 32749
|
|
85
|
+
|
|
86
|
+
# ids for basic serializers
|
|
87
|
+
cdef:
|
|
88
|
+
int PICKLE_SERIALIZER = 0
|
|
89
|
+
int PRIMITIVE_SERIALIZER = 1
|
|
90
|
+
int BYTES_SERIALIZER = 2
|
|
91
|
+
int STR_SERIALIZER = 3
|
|
92
|
+
int TUPLE_SERIALIZER = 4
|
|
93
|
+
int LIST_SERIALIZER = 5
|
|
94
|
+
int DICT_SERIALIZER = 6
|
|
95
|
+
int PY_DATETIME_SERIALIZER = 7
|
|
96
|
+
int PY_DATE_SERIALIZER = 8
|
|
97
|
+
int PY_TIMEDELTA_SERIALIZER = 9
|
|
98
|
+
int PY_TZINFO_SERIALIZER = 10
|
|
99
|
+
int DTYPE_SERIALIZER = 11
|
|
100
|
+
int COMPLEX_SERIALIZER = 12
|
|
101
|
+
int SLICE_SERIALIZER = 13
|
|
102
|
+
int REGEX_SERIALIZER = 14
|
|
103
|
+
int NO_DEFAULT_SERIALIZER = 15
|
|
104
|
+
int ARROW_BUFFER_SERIALIZER = 16
|
|
105
|
+
int RANGE_SERIALIZER = 17
|
|
106
|
+
int PY_DECIMAL_SERIALIZER = 18
|
|
107
|
+
int PLACEHOLDER_SERIALIZER = 4096
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
cdef dict _type_cache = dict()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
cdef object pickle_serial_hook = contextvars.ContextVar("pickle_serial_hook", default=None)
|
|
114
|
+
cdef object pickle_deserial_hook = contextvars.ContextVar("pickle_deserial_hook", default=None)
|
|
115
|
+
|
|
116
|
+
cdef class PickleHookOptions:
|
|
117
|
+
cdef:
|
|
118
|
+
object _serial_hook
|
|
119
|
+
object _pre_serial_hook
|
|
120
|
+
object _deserial_hook
|
|
121
|
+
object _pre_deserial_hook
|
|
122
|
+
|
|
123
|
+
def __init__(self, serial_hook: object = None, deserial_hook: object = None):
|
|
124
|
+
self._serial_hook = serial_hook
|
|
125
|
+
self._deserial_hook = deserial_hook
|
|
126
|
+
|
|
127
|
+
def __enter__(self):
|
|
128
|
+
self._pre_serial_hook = pickle_serial_hook.set(self._serial_hook)
|
|
129
|
+
self._pre_deserial_hook = pickle_deserial_hook.set(self._deserial_hook)
|
|
130
|
+
|
|
131
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
132
|
+
pickle_serial_hook.reset(self._pre_serial_hook)
|
|
133
|
+
pickle_deserial_hook.reset(self._pre_deserial_hook)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
cdef bint unpickle_allowed
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def reload_unpickle_flag():
|
|
140
|
+
global unpickle_allowed
|
|
141
|
+
unpickle_allowed = str_to_bool(
|
|
142
|
+
os.getenv("MAXFRAME_SERIALIZE_UNPICKLE_ALLOWED", "1")
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
reload_unpickle_flag()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
cdef object _load_by_name(str class_name):
|
|
150
|
+
if class_name in _type_cache:
|
|
151
|
+
cls = _type_cache[class_name]
|
|
152
|
+
else:
|
|
153
|
+
try:
|
|
154
|
+
from .deserializer import safe_load_by_name
|
|
155
|
+
|
|
156
|
+
cls = safe_load_by_name(class_name)
|
|
157
|
+
except ImportError:
|
|
158
|
+
if pickle.is_unpickle_forbidden():
|
|
159
|
+
raise
|
|
160
|
+
|
|
161
|
+
mod_name, cls_name = class_name.rsplit("#", 1)
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
cls = importlib.import_module(mod_name)
|
|
165
|
+
except ImportError as ex:
|
|
166
|
+
raise ImportError(
|
|
167
|
+
f"Failed to import {mod_name} when loading "
|
|
168
|
+
f"class {class_name}, {ex}"
|
|
169
|
+
) from None
|
|
170
|
+
|
|
171
|
+
for sub_cls_name in cls_name.split("."):
|
|
172
|
+
cls = getattr(cls, sub_cls_name)
|
|
173
|
+
_type_cache[class_name] = cls
|
|
174
|
+
return cls
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
cpdef object load_type(str class_name, object parent_class):
|
|
178
|
+
cls = _load_by_name(class_name)
|
|
179
|
+
if not isinstance(cls, type):
|
|
180
|
+
raise ValueError(f"Class {class_name} not a type, cannot be deserialized")
|
|
181
|
+
if not issubclass(cls, parent_class):
|
|
182
|
+
raise ValueError(f"Class {class_name} not a {parent_class}")
|
|
183
|
+
return cls
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
cpdef object load_member(str class_name, object restrict_type):
|
|
187
|
+
member = _load_by_name(class_name)
|
|
188
|
+
if not isinstance(member, restrict_type):
|
|
189
|
+
raise ValueError(
|
|
190
|
+
f"Class {class_name} not a {restrict_type}, cannot be deserialized"
|
|
191
|
+
)
|
|
192
|
+
return member
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
cpdef void clear_type_cache():
|
|
196
|
+
_type_cache.clear()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
cdef Serializer get_deserializer(int32_t deserializer_id):
|
|
200
|
+
return _deserializers[deserializer_id]
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
cdef class Serializer:
|
|
204
|
+
serializer_id = None
|
|
205
|
+
_public_data_context_key = 0x7fffffff - 1
|
|
206
|
+
|
|
207
|
+
def __cinit__(self):
|
|
208
|
+
# make the value can be referenced with C code
|
|
209
|
+
self._serializer_id = self.serializer_id
|
|
210
|
+
|
|
211
|
+
cpdef bint is_public_data_exist(self, dict context, object key):
|
|
212
|
+
cdef dict public_dict = context.get(self._public_data_context_key, None)
|
|
213
|
+
if public_dict is None:
|
|
214
|
+
return False
|
|
215
|
+
return key in public_dict
|
|
216
|
+
|
|
217
|
+
cpdef put_public_data(self, dict context, object key, object value):
|
|
218
|
+
cdef dict public_dict = context.get(self._public_data_context_key, None)
|
|
219
|
+
if public_dict is None:
|
|
220
|
+
public_dict = context[self._public_data_context_key] = {}
|
|
221
|
+
public_dict[key] = value
|
|
222
|
+
|
|
223
|
+
cpdef get_public_data(self, dict context, object key):
|
|
224
|
+
cdef dict public_dict = context.get(self._public_data_context_key, None)
|
|
225
|
+
if public_dict is None:
|
|
226
|
+
return None
|
|
227
|
+
return public_dict.get(key)
|
|
228
|
+
|
|
229
|
+
cpdef serial(self, object obj, dict context):
|
|
230
|
+
"""
|
|
231
|
+
Returns intermediate serialization result of certain object.
|
|
232
|
+
The returned value can be a Placeholder or a tuple comprising
|
|
233
|
+
of three parts: a header, a group of subcomponents and
|
|
234
|
+
a finalizing flag.
|
|
235
|
+
|
|
236
|
+
* Header is a pickle-serializable tuple
|
|
237
|
+
* Subcomponents are parts or buffers for iterative
|
|
238
|
+
serialization.
|
|
239
|
+
* Flag is a boolean value. If true, subcomponents should be
|
|
240
|
+
buffers (for instance, bytes, memory views, GPU buffers,
|
|
241
|
+
etc.) that can be read and written directly. If false,
|
|
242
|
+
subcomponents will be serialized iteratively.
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
obj: Any
|
|
247
|
+
Object to serialize
|
|
248
|
+
context: Dict
|
|
249
|
+
Serialization context to help creating Placeholder objects
|
|
250
|
+
for reducing duplicated serialization
|
|
251
|
+
|
|
252
|
+
Returns
|
|
253
|
+
-------
|
|
254
|
+
result: Placeholder | Tuple[Tuple, List, bool]
|
|
255
|
+
Intermediate result of serialization
|
|
256
|
+
"""
|
|
257
|
+
raise NotImplementedError
|
|
258
|
+
|
|
259
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
260
|
+
"""
|
|
261
|
+
Returns deserialized object given serialized headers and
|
|
262
|
+
deserialized subcomponents.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
serialized: List
|
|
267
|
+
Serialized object header as a tuple
|
|
268
|
+
context
|
|
269
|
+
Serialization context for instantiation of Placeholder
|
|
270
|
+
objects
|
|
271
|
+
subs: List
|
|
272
|
+
Deserialized subcomponents
|
|
273
|
+
|
|
274
|
+
Returns
|
|
275
|
+
-------
|
|
276
|
+
result: Any
|
|
277
|
+
Deserialized objects
|
|
278
|
+
"""
|
|
279
|
+
raise NotImplementedError
|
|
280
|
+
|
|
281
|
+
cpdef on_deserial_error(
|
|
282
|
+
self,
|
|
283
|
+
list serialized,
|
|
284
|
+
dict context,
|
|
285
|
+
list subs_serialized,
|
|
286
|
+
int error_index,
|
|
287
|
+
object exc,
|
|
288
|
+
):
|
|
289
|
+
"""
|
|
290
|
+
Returns rewritten exception when subcomponent deserialization fails
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
serialized: List
|
|
295
|
+
Serialized object header as a tuple
|
|
296
|
+
context
|
|
297
|
+
Serialization context for instantiation of Placeholder
|
|
298
|
+
objects
|
|
299
|
+
subs_serialized: List
|
|
300
|
+
Serialized subcomponents
|
|
301
|
+
error_index: int
|
|
302
|
+
Index of subcomponent causing error
|
|
303
|
+
exc: BaseException
|
|
304
|
+
Exception raised
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
exc: BaseException | None
|
|
309
|
+
Rewritten exception. If None, original exception is kept.
|
|
310
|
+
"""
|
|
311
|
+
return None
|
|
312
|
+
|
|
313
|
+
@classmethod
|
|
314
|
+
def calc_default_serializer_id(cls):
|
|
315
|
+
s = f"{cls.__module__}.{cls.__qualname__}"
|
|
316
|
+
h = hashlib.md5(s.encode())
|
|
317
|
+
return int(h.hexdigest(), 16) % _SERIALIZER_ID_PRIME
|
|
318
|
+
|
|
319
|
+
@classmethod
|
|
320
|
+
def register(cls, obj_type, name=None):
|
|
321
|
+
if (
|
|
322
|
+
cls.serializer_id is None
|
|
323
|
+
or cls.serializer_id == getattr(super(cls, cls), "serializer_id", None)
|
|
324
|
+
):
|
|
325
|
+
# a class should have its own serializer_id
|
|
326
|
+
# inherited serializer_id not acceptable
|
|
327
|
+
cls.serializer_id = cls.calc_default_serializer_id()
|
|
328
|
+
|
|
329
|
+
inst = cls()
|
|
330
|
+
if name is not None:
|
|
331
|
+
obj_type = NamedType(name, obj_type)
|
|
332
|
+
_serial_dispatcher.register(obj_type, inst)
|
|
333
|
+
if _deserializers.get(cls.serializer_id) is not None:
|
|
334
|
+
assert type(_deserializers[cls.serializer_id]) is cls
|
|
335
|
+
else:
|
|
336
|
+
_deserializers[cls.serializer_id] = inst
|
|
337
|
+
|
|
338
|
+
@classmethod
|
|
339
|
+
def unregister(cls, obj_type, name=None):
|
|
340
|
+
if name is not None:
|
|
341
|
+
obj_type = NamedType(name, obj_type)
|
|
342
|
+
_serial_dispatcher.unregister(obj_type)
|
|
343
|
+
_deserializers.pop(cls.serializer_id, None)
|
|
344
|
+
|
|
345
|
+
@classmethod
|
|
346
|
+
def dump_handlers(cls):
|
|
347
|
+
return _serial_dispatcher.dump_handlers()
|
|
348
|
+
|
|
349
|
+
@classmethod
|
|
350
|
+
def load_handlers(cls, *args):
|
|
351
|
+
_serial_dispatcher.load_handlers(*args)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
cdef inline uint64_t _fast_id(PyObject * obj) nogil:
|
|
355
|
+
return <uintptr_t>obj
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def fast_id(obj):
|
|
359
|
+
"""C version of id() used for serialization"""
|
|
360
|
+
return _fast_id(<PyObject *>obj)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def buffered(func):
|
|
364
|
+
"""
|
|
365
|
+
Wrapper for serial() method to reduce duplicated serialization
|
|
366
|
+
"""
|
|
367
|
+
@wraps(func)
|
|
368
|
+
def wrapped(self, obj: Any, dict context):
|
|
369
|
+
cdef uint64_t obj_id = _fast_id(<PyObject*>obj)
|
|
370
|
+
if obj_id in context:
|
|
371
|
+
return Placeholder(_fast_id(<PyObject*>obj))
|
|
372
|
+
else:
|
|
373
|
+
context[obj_id] = obj
|
|
374
|
+
return func(self, obj, context)
|
|
375
|
+
|
|
376
|
+
return wrapped
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def pickle_buffers(obj):
|
|
380
|
+
cdef list buffers = [None]
|
|
381
|
+
|
|
382
|
+
if HAS_PICKLE_BUFFER:
|
|
383
|
+
|
|
384
|
+
def buffer_cb(x):
|
|
385
|
+
x = x.raw()
|
|
386
|
+
if x.ndim > 1:
|
|
387
|
+
# ravel n-d memoryview
|
|
388
|
+
x = x.cast(x.format)
|
|
389
|
+
buffers.append(memoryview(x))
|
|
390
|
+
|
|
391
|
+
buffers[0] = pickle.dumps(
|
|
392
|
+
obj,
|
|
393
|
+
buffer_callback=buffer_cb,
|
|
394
|
+
protocol=BUFFER_PICKLE_PROTOCOL,
|
|
395
|
+
)
|
|
396
|
+
else: # pragma: no cover
|
|
397
|
+
buffers[0] = pickle.dumps(obj)
|
|
398
|
+
return buffers
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def unpickle_buffers(buffers):
|
|
402
|
+
result = pickle.loads(buffers[0], buffers=buffers[1:])
|
|
403
|
+
|
|
404
|
+
# as pandas prior to 1.1.0 use _data instead of _mgr to hold BlockManager,
|
|
405
|
+
# deserializing from high versions may produce mal-functioned pandas objects,
|
|
406
|
+
# thus the patch is needed
|
|
407
|
+
if _PANDAS_HAS_MGR:
|
|
408
|
+
return result
|
|
409
|
+
else: # pragma: no cover
|
|
410
|
+
if hasattr(result, "_mgr") and isinstance(result, (pd.DataFrame, pd.Series)):
|
|
411
|
+
result._data = getattr(result, "_mgr")
|
|
412
|
+
delattr(result, "_mgr")
|
|
413
|
+
return result
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
cdef class PickleContainer:
|
|
417
|
+
cdef:
|
|
418
|
+
list buffers
|
|
419
|
+
|
|
420
|
+
def __init__(self, list buffers):
|
|
421
|
+
self.buffers = buffers
|
|
422
|
+
|
|
423
|
+
cpdef get(self):
|
|
424
|
+
if not unpickle_allowed:
|
|
425
|
+
raise ValueError("Unpickle not allowed in this environment")
|
|
426
|
+
return unpickle_buffers(self.buffers)
|
|
427
|
+
|
|
428
|
+
cpdef list get_buffers(self):
|
|
429
|
+
return self.buffers
|
|
430
|
+
|
|
431
|
+
def __copy__(self):
|
|
432
|
+
return PickleContainer(self.buffers)
|
|
433
|
+
|
|
434
|
+
def __deepcopy__(self, memo=None):
|
|
435
|
+
return PickleContainer(copy.deepcopy(self.buffers, memo))
|
|
436
|
+
|
|
437
|
+
def __maxframe_tokenize__(self):
|
|
438
|
+
return self.buffers
|
|
439
|
+
|
|
440
|
+
def __reduce__(self):
|
|
441
|
+
return PickleContainer, (self.buffers, )
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
cdef class PickleSerializer(Serializer):
|
|
445
|
+
serializer_id = PICKLE_SERIALIZER
|
|
446
|
+
|
|
447
|
+
cpdef serial(self, obj: Any, dict context):
|
|
448
|
+
cdef uint64_t obj_id
|
|
449
|
+
cdef object serial_hook
|
|
450
|
+
|
|
451
|
+
serial_hook = pickle_serial_hook.get()
|
|
452
|
+
if serial_hook is not None:
|
|
453
|
+
serial_hook()
|
|
454
|
+
|
|
455
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
456
|
+
if obj_id in context:
|
|
457
|
+
return Placeholder(obj_id)
|
|
458
|
+
context[obj_id] = obj
|
|
459
|
+
|
|
460
|
+
if type(obj) is PickleContainer:
|
|
461
|
+
return [], (<PickleContainer>obj).get_buffers(), True
|
|
462
|
+
return [], pickle_buffers(obj), True
|
|
463
|
+
|
|
464
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
465
|
+
from .deserializer import deserial_pickle
|
|
466
|
+
cdef object deserial_hook
|
|
467
|
+
|
|
468
|
+
deserial_hook = pickle_deserial_hook.get()
|
|
469
|
+
if deserial_hook is not None:
|
|
470
|
+
deserial_hook()
|
|
471
|
+
return deserial_pickle(serialized, context, subs)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
cdef set _primitive_types = {
|
|
475
|
+
type(None),
|
|
476
|
+
bool,
|
|
477
|
+
int,
|
|
478
|
+
float,
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
cdef class PrimitiveSerializer(Serializer):
|
|
483
|
+
serializer_id = PRIMITIVE_SERIALIZER
|
|
484
|
+
|
|
485
|
+
cpdef serial(self, object obj, dict context):
|
|
486
|
+
return [obj,], [], True
|
|
487
|
+
|
|
488
|
+
cpdef deserial(self, list obj, dict context, list subs):
|
|
489
|
+
return obj[0]
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
cdef class BytesSerializer(Serializer):
|
|
493
|
+
serializer_id = BYTES_SERIALIZER
|
|
494
|
+
|
|
495
|
+
cpdef serial(self, obj: Any, dict context):
|
|
496
|
+
cdef uint64_t obj_id
|
|
497
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
498
|
+
if obj_id in context:
|
|
499
|
+
return Placeholder(obj_id)
|
|
500
|
+
context[obj_id] = obj
|
|
501
|
+
|
|
502
|
+
return [], [obj], True
|
|
503
|
+
|
|
504
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
505
|
+
return subs[0]
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
cdef class StrSerializer(Serializer):
|
|
509
|
+
serializer_id = STR_SERIALIZER
|
|
510
|
+
|
|
511
|
+
cpdef serial(self, obj: Any, dict context):
|
|
512
|
+
cdef uint64_t obj_id
|
|
513
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
514
|
+
if obj_id in context:
|
|
515
|
+
return Placeholder(obj_id)
|
|
516
|
+
context[obj_id] = obj
|
|
517
|
+
|
|
518
|
+
return [], [(<str>obj).encode()], True
|
|
519
|
+
|
|
520
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
521
|
+
buffer = subs[0]
|
|
522
|
+
if type(buffer) is memoryview:
|
|
523
|
+
buffer = buffer.tobytes()
|
|
524
|
+
return buffer.decode()
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
cdef class CollectionSerializer(Serializer):
|
|
528
|
+
obj_type = None
|
|
529
|
+
|
|
530
|
+
cdef object _obj_type
|
|
531
|
+
|
|
532
|
+
def __cinit__(self):
|
|
533
|
+
# make the value can be referenced with C code
|
|
534
|
+
self._obj_type = self.obj_type
|
|
535
|
+
|
|
536
|
+
cdef tuple _serial_iterable(self, obj: Any):
|
|
537
|
+
cdef list idx_to_propagate = []
|
|
538
|
+
cdef list obj_to_propagate = []
|
|
539
|
+
cdef list obj_list = <list>obj if type(obj) is list else list(obj)
|
|
540
|
+
cdef int64_t idx
|
|
541
|
+
cdef object item
|
|
542
|
+
|
|
543
|
+
for idx in range(len(obj_list)):
|
|
544
|
+
item = obj_list[idx]
|
|
545
|
+
|
|
546
|
+
if type(item) is bytes and len(<bytes>item) < _MAX_STR_PRIMITIVE_LEN:
|
|
547
|
+
# treat short strings as primitives
|
|
548
|
+
continue
|
|
549
|
+
elif type(item) is str and len(<str>item) < _MAX_STR_PRIMITIVE_LEN:
|
|
550
|
+
# treat short strings as primitives
|
|
551
|
+
continue
|
|
552
|
+
elif type(item) in _primitive_types:
|
|
553
|
+
continue
|
|
554
|
+
|
|
555
|
+
if obj is obj_list:
|
|
556
|
+
obj_list = list(obj)
|
|
557
|
+
|
|
558
|
+
obj_list[idx] = None
|
|
559
|
+
idx_to_propagate.append(idx)
|
|
560
|
+
obj_to_propagate.append(item)
|
|
561
|
+
|
|
562
|
+
return [obj_list, idx_to_propagate], obj_to_propagate, False
|
|
563
|
+
|
|
564
|
+
cpdef serial(self, obj: Any, dict context):
|
|
565
|
+
cdef uint64_t obj_id
|
|
566
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
567
|
+
if obj_id in context:
|
|
568
|
+
return Placeholder(obj_id)
|
|
569
|
+
context[obj_id] = obj
|
|
570
|
+
|
|
571
|
+
return self._serial_iterable(obj)
|
|
572
|
+
|
|
573
|
+
cdef list _deserial_iterable(self, list serialized, list subs):
|
|
574
|
+
cdef list res_list, idx_to_propagate
|
|
575
|
+
cdef int64_t i
|
|
576
|
+
|
|
577
|
+
res_list, idx_to_propagate = serialized
|
|
578
|
+
|
|
579
|
+
for i in range(len(idx_to_propagate)):
|
|
580
|
+
res_list[idx_to_propagate[i]] = subs[i]
|
|
581
|
+
return res_list
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
cdef class TupleSerializer(CollectionSerializer):
|
|
585
|
+
serializer_id = TUPLE_SERIALIZER
|
|
586
|
+
obj_type = tuple
|
|
587
|
+
|
|
588
|
+
cpdef serial(self, obj: Any, dict context):
|
|
589
|
+
cdef uint64_t obj_id
|
|
590
|
+
cdef list header
|
|
591
|
+
cdef object data, is_leaf
|
|
592
|
+
|
|
593
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
594
|
+
if obj_id in context:
|
|
595
|
+
return Placeholder(obj_id)
|
|
596
|
+
context[obj_id] = obj
|
|
597
|
+
|
|
598
|
+
header, data, is_leaf = self._serial_iterable(obj)
|
|
599
|
+
if hasattr(type(obj), "_fields"):
|
|
600
|
+
header.append(type(obj).__module__ + "#" + type(obj).__qualname__)
|
|
601
|
+
else:
|
|
602
|
+
header.append(None)
|
|
603
|
+
return header, data, is_leaf
|
|
604
|
+
|
|
605
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
606
|
+
cdef list res
|
|
607
|
+
cdef str tuple_type_name = serialized[-1]
|
|
608
|
+
|
|
609
|
+
res = self._deserial_iterable(serialized[:-1], subs)
|
|
610
|
+
for v in res:
|
|
611
|
+
assert type(v) is not Placeholder
|
|
612
|
+
|
|
613
|
+
if tuple_type_name is None:
|
|
614
|
+
return tuple(res)
|
|
615
|
+
else:
|
|
616
|
+
tuple_type = load_type(tuple_type_name, tuple)
|
|
617
|
+
return tuple_type(*res)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
cdef class ListSerializer(CollectionSerializer):
|
|
621
|
+
serializer_id = LIST_SERIALIZER
|
|
622
|
+
obj_type = list
|
|
623
|
+
|
|
624
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
625
|
+
cdef int64_t idx
|
|
626
|
+
cdef list res = self._deserial_iterable(serialized, subs)
|
|
627
|
+
|
|
628
|
+
result = list(res)
|
|
629
|
+
|
|
630
|
+
for idx, v in enumerate(res):
|
|
631
|
+
if type(v) is Placeholder:
|
|
632
|
+
cb = partial(result.__setitem__, idx)
|
|
633
|
+
(<Placeholder>v).callbacks.append(cb)
|
|
634
|
+
return result
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def _dict_key_replacer(ret, key, real_key):
|
|
638
|
+
ret[real_key] = ret.pop(key)
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def _dict_value_replacer(context, ret, key, real_value):
|
|
642
|
+
if type(key) is Placeholder:
|
|
643
|
+
key = context[(<Placeholder>key).id]
|
|
644
|
+
ret[key] = real_value
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
cdef:
|
|
648
|
+
object _TYPE_CHAR_ORDERED_DICT = "O"
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
cdef class DictSerializer(CollectionSerializer):
|
|
652
|
+
serializer_id = DICT_SERIALIZER
|
|
653
|
+
|
|
654
|
+
cpdef serial(self, obj: Any, dict context):
|
|
655
|
+
cdef uint64_t obj_id
|
|
656
|
+
cdef list key_obj, value_obj
|
|
657
|
+
cdef list key_bufs, value_bufs
|
|
658
|
+
|
|
659
|
+
if type(obj) is dict and len(<dict>obj) == 0:
|
|
660
|
+
return [], [], True
|
|
661
|
+
|
|
662
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
663
|
+
if obj_id in context:
|
|
664
|
+
return Placeholder(obj_id)
|
|
665
|
+
context[obj_id] = obj
|
|
666
|
+
|
|
667
|
+
if isinstance(obj, OrderedDict):
|
|
668
|
+
ser_type = _TYPE_CHAR_ORDERED_DICT
|
|
669
|
+
else:
|
|
670
|
+
ser_type = None
|
|
671
|
+
|
|
672
|
+
key_obj, key_bufs, _ = self._serial_iterable(obj.keys())
|
|
673
|
+
value_obj, value_bufs, _ = self._serial_iterable(obj.values())
|
|
674
|
+
ser_obj = [key_obj, value_obj, len(key_bufs), ser_type]
|
|
675
|
+
return ser_obj, key_bufs + value_bufs, False
|
|
676
|
+
|
|
677
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
678
|
+
cdef int64_t i, num_key_bufs
|
|
679
|
+
cdef list key_subs, value_subs, keys, values
|
|
680
|
+
|
|
681
|
+
if not serialized:
|
|
682
|
+
return {}
|
|
683
|
+
if len(serialized) == 1:
|
|
684
|
+
# serialized directly
|
|
685
|
+
return serialized[0]
|
|
686
|
+
|
|
687
|
+
key_serialized, value_serialized, num_key_bufs, ser_type = serialized
|
|
688
|
+
key_subs = subs[:num_key_bufs]
|
|
689
|
+
value_subs = subs[num_key_bufs:]
|
|
690
|
+
|
|
691
|
+
keys = self._deserial_iterable(<list>key_serialized, key_subs)
|
|
692
|
+
values = self._deserial_iterable(<list>value_serialized, value_subs)
|
|
693
|
+
|
|
694
|
+
if ser_type == _TYPE_CHAR_ORDERED_DICT:
|
|
695
|
+
ret = OrderedDict(zip(keys, values))
|
|
696
|
+
else:
|
|
697
|
+
ret = dict(zip(keys, values))
|
|
698
|
+
|
|
699
|
+
for i in range(len(keys)):
|
|
700
|
+
k, v = keys[i], values[i]
|
|
701
|
+
if type(k) is Placeholder:
|
|
702
|
+
(<Placeholder>k).callbacks.append(
|
|
703
|
+
partial(_dict_key_replacer, ret, k)
|
|
704
|
+
)
|
|
705
|
+
if type(v) is Placeholder:
|
|
706
|
+
(<Placeholder>v).callbacks.append(
|
|
707
|
+
partial(_dict_value_replacer, context, ret, k)
|
|
708
|
+
)
|
|
709
|
+
return ret
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
cdef class PyDatetimeSerializer(Serializer):
|
|
713
|
+
serializer_id = PY_DATETIME_SERIALIZER
|
|
714
|
+
|
|
715
|
+
cpdef serial(self, obj: datetime.datetime, dict context):
|
|
716
|
+
cdef list ser_tz = (
|
|
717
|
+
_serial_tz(obj.tzinfo) if obj.tzinfo is not None else None
|
|
718
|
+
)
|
|
719
|
+
return [obj.timestamp(), ser_tz], [], True
|
|
720
|
+
|
|
721
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
722
|
+
cdef object tz = (
|
|
723
|
+
_deserialize_tz(serialized[1]) if serialized[1] is not None else None
|
|
724
|
+
)
|
|
725
|
+
return datetime.datetime.fromtimestamp(serialized[0], tz)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
cdef class PyDateSerializer(Serializer):
|
|
729
|
+
serializer_id = PY_DATE_SERIALIZER
|
|
730
|
+
|
|
731
|
+
cpdef serial(self, obj: datetime.date, dict context):
|
|
732
|
+
return [obj.toordinal()], [], True
|
|
733
|
+
|
|
734
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
735
|
+
return datetime.date.fromordinal(serialized[0])
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
cdef class PyTimedeltaSerializer(Serializer):
|
|
739
|
+
serializer_id = PY_TIMEDELTA_SERIALIZER
|
|
740
|
+
|
|
741
|
+
cpdef serial(self, obj: datetime.timedelta, dict context):
|
|
742
|
+
return [obj.days, obj.seconds, obj.microseconds], [], True
|
|
743
|
+
|
|
744
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
745
|
+
return datetime.timedelta(
|
|
746
|
+
days=serialized[0],
|
|
747
|
+
seconds=serialized[1],
|
|
748
|
+
microseconds=serialized[2],
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
cdef:
|
|
753
|
+
object _TYPE_CHAR_TZ_BASE = "S"
|
|
754
|
+
object _TYPE_CHAR_TZ_ZONEINFO = "ZI"
|
|
755
|
+
object _TYPE_CHAR_TZ_PYTZ = "PT"
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
cdef inline list _serial_tz(
|
|
759
|
+
obj: datetime.tzinfo, dt: Optional[datetime.datetime] = None
|
|
760
|
+
):
|
|
761
|
+
cdef object type_char
|
|
762
|
+
if isinstance(obj, PyTZ_BaseTzInfo):
|
|
763
|
+
return [_TYPE_CHAR_TZ_PYTZ, obj.zone]
|
|
764
|
+
elif isinstance(obj, ZoneInfo):
|
|
765
|
+
return [_TYPE_CHAR_TZ_ZONEINFO, obj.key]
|
|
766
|
+
else:
|
|
767
|
+
dt = dt or datetime.datetime.now()
|
|
768
|
+
return [
|
|
769
|
+
_TYPE_CHAR_TZ_BASE,
|
|
770
|
+
obj.tzname(dt),
|
|
771
|
+
int(obj.utcoffset(dt).total_seconds()),
|
|
772
|
+
]
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
cdef inline object _deserialize_tz(list serialized):
|
|
776
|
+
if serialized[0] == _TYPE_CHAR_TZ_PYTZ:
|
|
777
|
+
return pytz.timezone(serialized[1])
|
|
778
|
+
elif serialized[0] == _TYPE_CHAR_TZ_ZONEINFO:
|
|
779
|
+
return zoneinfo.ZoneInfo(serialized[1])
|
|
780
|
+
else:
|
|
781
|
+
if serialized[2] == 0:
|
|
782
|
+
return datetime.timezone.utc
|
|
783
|
+
return datetime.timezone(
|
|
784
|
+
datetime.timedelta(seconds=serialized[2]), name=serialized[1]
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
cdef class TZInfoSerializer(Serializer):
|
|
789
|
+
serializer_id = PY_TZINFO_SERIALIZER
|
|
790
|
+
|
|
791
|
+
cpdef serial(self, object obj: datetime.tzinfo, dict context):
|
|
792
|
+
return _serial_tz(obj), [], True
|
|
793
|
+
|
|
794
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
795
|
+
return _deserialize_tz(serialized)
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
cdef:
|
|
799
|
+
object _TYPE_CHAR_DTYPE_NUMPY = "N"
|
|
800
|
+
object _TYPE_CHAR_DTYPE_PANDAS_ARROW = "PA"
|
|
801
|
+
object _TYPE_CHAR_DTYPE_PANDAS_CATEGORICAL = "PC"
|
|
802
|
+
object _TYPE_CHAR_DTYPE_PANDAS_INTERVAL = "PI"
|
|
803
|
+
object _TYPE_CHAR_DTYPE_PANDAS_EXTENSION = "PE"
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
cdef class DtypeSerializer(Serializer):
|
|
807
|
+
serializer_id = DTYPE_SERIALIZER
|
|
808
|
+
|
|
809
|
+
@staticmethod
|
|
810
|
+
def _sort_fields(list fields):
|
|
811
|
+
return sorted(fields, key=lambda k: fields[k][1])
|
|
812
|
+
|
|
813
|
+
cpdef serial(self, obj: Union[np.dtype, ExtensionDtype], dict context):
|
|
814
|
+
if isinstance(obj, np.dtype):
|
|
815
|
+
try:
|
|
816
|
+
return [
|
|
817
|
+
_TYPE_CHAR_DTYPE_NUMPY, np.lib.format.dtype_to_descr(obj), None
|
|
818
|
+
], [], True
|
|
819
|
+
except ValueError:
|
|
820
|
+
fields = obj.fields
|
|
821
|
+
new_fields = self._sort_fields(fields)
|
|
822
|
+
desc = np.lib.format.dtype_to_descr(obj[new_fields])
|
|
823
|
+
dtype_new_order = list(fields)
|
|
824
|
+
return [_TYPE_CHAR_DTYPE_NUMPY, desc, dtype_new_order], [], True
|
|
825
|
+
elif isinstance(obj, ExtensionDtype):
|
|
826
|
+
if _ARROW_DTYPE_NOT_SUPPORTED:
|
|
827
|
+
raise ImportError("ArrowDtype is not supported in current environment")
|
|
828
|
+
if isinstance(obj, ArrowDtype):
|
|
829
|
+
return [_TYPE_CHAR_DTYPE_PANDAS_ARROW, str(obj.pyarrow_dtype)], [], True
|
|
830
|
+
elif isinstance(obj, pd.CategoricalDtype):
|
|
831
|
+
return [
|
|
832
|
+
_TYPE_CHAR_DTYPE_PANDAS_CATEGORICAL, obj.ordered
|
|
833
|
+
], [obj.categories], False
|
|
834
|
+
elif isinstance(obj, pd.IntervalDtype):
|
|
835
|
+
return [
|
|
836
|
+
_TYPE_CHAR_DTYPE_PANDAS_INTERVAL, obj.closed
|
|
837
|
+
], [obj.subdtype], False
|
|
838
|
+
else:
|
|
839
|
+
return [_TYPE_CHAR_DTYPE_PANDAS_EXTENSION, repr(obj)], [], True
|
|
840
|
+
else:
|
|
841
|
+
raise NotImplementedError(f"Does not support serializing dtype {obj!r}")
|
|
842
|
+
|
|
843
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
844
|
+
cdef str ser_type = serialized[0]
|
|
845
|
+
if ser_type == _TYPE_CHAR_DTYPE_NUMPY:
|
|
846
|
+
try:
|
|
847
|
+
dt = np.lib.format.descr_to_dtype(serialized[1])
|
|
848
|
+
except AttributeError:
|
|
849
|
+
dt = np.dtype(serialized[1])
|
|
850
|
+
|
|
851
|
+
if serialized[2] is not None:
|
|
852
|
+
# fill dtype_new_order field
|
|
853
|
+
dt = dt[serialized[2]]
|
|
854
|
+
return dt
|
|
855
|
+
elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_ARROW:
|
|
856
|
+
if _ARROW_DTYPE_NOT_SUPPORTED:
|
|
857
|
+
raise ImportError("ArrowDtype is not supported in current environment")
|
|
858
|
+
return ArrowDtype(arrow_type_from_str(serialized[1]))
|
|
859
|
+
elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_CATEGORICAL:
|
|
860
|
+
return pd.CategoricalDtype(subs[0], serialized[1])
|
|
861
|
+
elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_INTERVAL:
|
|
862
|
+
return pd.IntervalDtype(subs[0], serialized[1])
|
|
863
|
+
elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_EXTENSION:
|
|
864
|
+
if serialized[1] == "StringDtype": # for legacy pandas version
|
|
865
|
+
return pd.StringDtype()
|
|
866
|
+
try:
|
|
867
|
+
return pandas_dtype(serialized[1])
|
|
868
|
+
except TypeError:
|
|
869
|
+
if serialized[1].endswith("Dtype()"):
|
|
870
|
+
return pandas_dtype(serialized[1][:-7])
|
|
871
|
+
else:
|
|
872
|
+
raise NotImplementedError(f"Unknown serialization type {ser_type}")
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
cdef class ComplexSerializer(Serializer):
|
|
876
|
+
serializer_id = COMPLEX_SERIALIZER
|
|
877
|
+
|
|
878
|
+
cpdef serial(self, object obj: complex, dict context):
|
|
879
|
+
cdef complex cplx = <complex>obj
|
|
880
|
+
return [cplx.real, cplx.imag], [], True
|
|
881
|
+
|
|
882
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
883
|
+
return complex(*serialized[:2])
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
cdef class SliceSerializer(Serializer):
|
|
887
|
+
serializer_id = SLICE_SERIALIZER
|
|
888
|
+
|
|
889
|
+
cpdef serial(self, object obj: slice, dict context):
|
|
890
|
+
cdef list elems = [obj.start, obj.stop, obj.step]
|
|
891
|
+
for x in elems:
|
|
892
|
+
if x is not None and not isinstance(x, int):
|
|
893
|
+
return [], elems, False
|
|
894
|
+
return elems, [], True
|
|
895
|
+
|
|
896
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
897
|
+
if len(serialized) == 0:
|
|
898
|
+
return slice(subs[0], subs[1], subs[2])
|
|
899
|
+
return slice(*serialized[:3])
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
cdef class RangeSerializer(Serializer):
|
|
903
|
+
serializer_id = RANGE_SERIALIZER
|
|
904
|
+
|
|
905
|
+
cpdef serial(self, object obj: range, dict context):
|
|
906
|
+
return [obj.start, obj.stop, obj.step], [], True
|
|
907
|
+
|
|
908
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
909
|
+
return range(*serialized[:3])
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
cdef class RegexSerializer(Serializer):
|
|
913
|
+
serializer_id = REGEX_SERIALIZER
|
|
914
|
+
|
|
915
|
+
cpdef serial(self, object obj: re.Pattern, dict context):
|
|
916
|
+
cdef uint64_t obj_id
|
|
917
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
918
|
+
if obj_id in context:
|
|
919
|
+
return Placeholder(obj_id)
|
|
920
|
+
context[obj_id] = obj
|
|
921
|
+
|
|
922
|
+
return [obj.flags], [(<str>(obj.pattern)).encode()], True
|
|
923
|
+
|
|
924
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
925
|
+
return re.compile((<bytes>(subs[0])).decode(), serialized[0])
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
cdef class PyDecimalSerializer(Serializer):
|
|
929
|
+
serializer_id = PY_DECIMAL_SERIALIZER
|
|
930
|
+
|
|
931
|
+
cpdef serial(self, object obj: decimal.Decimal, dict context):
|
|
932
|
+
return [str(obj)], [], True
|
|
933
|
+
|
|
934
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
935
|
+
return decimal.Decimal(serialized[0])
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
cdef class NoDefaultSerializer(Serializer):
|
|
939
|
+
serializer_id = NO_DEFAULT_SERIALIZER
|
|
940
|
+
|
|
941
|
+
cpdef serial(self, object obj, dict context):
|
|
942
|
+
return [], [], True
|
|
943
|
+
|
|
944
|
+
cpdef deserial(self, list obj, dict context, list subs):
|
|
945
|
+
return no_default
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
cdef class ArrowBufferSerializer(Serializer):
|
|
949
|
+
serializer_id = ARROW_BUFFER_SERIALIZER
|
|
950
|
+
|
|
951
|
+
cpdef serial(self, object obj, dict context):
|
|
952
|
+
return [], [obj], True
|
|
953
|
+
|
|
954
|
+
cpdef deserial(self, list obj, dict context, list subs):
|
|
955
|
+
if not isinstance(subs[0], pa.Buffer):
|
|
956
|
+
return pa.py_buffer(subs[0])
|
|
957
|
+
return subs[0]
|
|
958
|
+
|
|
959
|
+
|
|
960
|
+
cdef class Placeholder:
|
|
961
|
+
"""
|
|
962
|
+
Placeholder object to reduce duplicated serialization
|
|
963
|
+
|
|
964
|
+
The object records object identifier and keeps callbacks
|
|
965
|
+
to replace itself in parent objects.
|
|
966
|
+
"""
|
|
967
|
+
def __init__(self, uint64_t id_):
|
|
968
|
+
self.id = id_
|
|
969
|
+
self.callbacks = []
|
|
970
|
+
|
|
971
|
+
def __hash__(self):
|
|
972
|
+
return self.id
|
|
973
|
+
|
|
974
|
+
def __eq__(self, other): # pragma: no cover
|
|
975
|
+
if type(other) is not Placeholder:
|
|
976
|
+
return False
|
|
977
|
+
return self.id == other.id
|
|
978
|
+
|
|
979
|
+
def __repr__(self):
|
|
980
|
+
return (
|
|
981
|
+
f"Placeholder(id={self.id}, "
|
|
982
|
+
f"callbacks=[list of {len(self.callbacks)}])"
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
cdef class PlaceholderSerializer(Serializer):
|
|
987
|
+
serializer_id = PLACEHOLDER_SERIALIZER
|
|
988
|
+
|
|
989
|
+
cpdef serial(self, obj: Any, dict context):
|
|
990
|
+
return [], [], True
|
|
991
|
+
|
|
992
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
993
|
+
return Placeholder(0)
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
PickleSerializer.register(object)
|
|
997
|
+
for _primitive in _primitive_types:
|
|
998
|
+
PrimitiveSerializer.register(_primitive)
|
|
999
|
+
BytesSerializer.register(bytes)
|
|
1000
|
+
BytesSerializer.register(memoryview)
|
|
1001
|
+
StrSerializer.register(str)
|
|
1002
|
+
ListSerializer.register(list)
|
|
1003
|
+
TupleSerializer.register(tuple)
|
|
1004
|
+
DictSerializer.register(dict)
|
|
1005
|
+
PyDatetimeSerializer.register(datetime.datetime)
|
|
1006
|
+
PyDateSerializer.register(datetime.date)
|
|
1007
|
+
PyTimedeltaSerializer.register(datetime.timedelta)
|
|
1008
|
+
TZInfoSerializer.register(datetime.tzinfo)
|
|
1009
|
+
DtypeSerializer.register(np.dtype)
|
|
1010
|
+
DtypeSerializer.register(ExtensionDtype)
|
|
1011
|
+
ComplexSerializer.register(complex)
|
|
1012
|
+
SliceSerializer.register(slice)
|
|
1013
|
+
RangeSerializer.register(range)
|
|
1014
|
+
RegexSerializer.register(re.Pattern)
|
|
1015
|
+
PyDecimalSerializer.register(decimal.Decimal)
|
|
1016
|
+
NoDefaultSerializer.register(NoDefault)
|
|
1017
|
+
if pa is not None:
|
|
1018
|
+
ArrowBufferSerializer.register(pa.Buffer)
|
|
1019
|
+
PlaceholderSerializer.register(Placeholder)
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
cdef class _SerialStackItem:
|
|
1023
|
+
cdef public list serialized
|
|
1024
|
+
cdef public list subs
|
|
1025
|
+
cdef public list subs_serialized
|
|
1026
|
+
|
|
1027
|
+
def __cinit__(self, list serialized, list subs):
|
|
1028
|
+
self.serialized = serialized
|
|
1029
|
+
self.subs = subs
|
|
1030
|
+
self.subs_serialized = []
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
cdef class _IdContextHolder:
|
|
1034
|
+
cdef public unordered_map[uint64_t, uint64_t] d
|
|
1035
|
+
cdef public uint64_t obj_count
|
|
1036
|
+
|
|
1037
|
+
def __cinit__(self):
|
|
1038
|
+
self.obj_count = 0
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
cdef tuple _serial_single(
|
|
1042
|
+
obj, dict context, _IdContextHolder id_context_holder
|
|
1043
|
+
):
|
|
1044
|
+
"""Serialize single object and return serialized tuples"""
|
|
1045
|
+
cdef uint64_t obj_id, ordered_id
|
|
1046
|
+
cdef Serializer serializer
|
|
1047
|
+
cdef int serializer_id
|
|
1048
|
+
cdef list common_header, serialized, subs
|
|
1049
|
+
|
|
1050
|
+
while True:
|
|
1051
|
+
name = context.get("serializer")
|
|
1052
|
+
obj_type = type(obj) if name is None else NamedType(name, type(obj))
|
|
1053
|
+
serializer = _serial_dispatcher.get_handler(obj_type)
|
|
1054
|
+
serializer_id = serializer._serializer_id
|
|
1055
|
+
ret_serial = serializer.serial(obj, context)
|
|
1056
|
+
if type(ret_serial) is tuple:
|
|
1057
|
+
# object is serialized, form a common header and return
|
|
1058
|
+
serialized, subs, final = <tuple>ret_serial
|
|
1059
|
+
|
|
1060
|
+
if type(obj) is Placeholder:
|
|
1061
|
+
obj_id = (<Placeholder>obj).id
|
|
1062
|
+
ordered_id = id_context_holder.d[obj_id]
|
|
1063
|
+
else:
|
|
1064
|
+
ordered_id = id_context_holder.obj_count
|
|
1065
|
+
id_context_holder.obj_count += 1
|
|
1066
|
+
# only need to record object ids for non-primitive types
|
|
1067
|
+
if serializer_id != PRIMITIVE_SERIALIZER:
|
|
1068
|
+
obj_id = _fast_id(<PyObject*>obj)
|
|
1069
|
+
id_context_holder.d[obj_id] = ordered_id
|
|
1070
|
+
|
|
1071
|
+
# REMEMBER to change _COMMON_HEADER_LEN when content of
|
|
1072
|
+
# this header changed
|
|
1073
|
+
common_header = [
|
|
1074
|
+
serializer_id, ordered_id, len(subs), final
|
|
1075
|
+
]
|
|
1076
|
+
break
|
|
1077
|
+
else:
|
|
1078
|
+
# object is converted into another (usually a Placeholder)
|
|
1079
|
+
obj = ret_serial
|
|
1080
|
+
common_header.extend(serialized)
|
|
1081
|
+
return common_header, subs, final
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
class _SerializeObjectOverflow(Exception):
|
|
1085
|
+
def __init__(self, list cur_serialized, int num_total_serialized):
|
|
1086
|
+
super(_SerializeObjectOverflow, self).__init__(cur_serialized)
|
|
1087
|
+
self.cur_serialized = cur_serialized
|
|
1088
|
+
self.num_total_serialized = num_total_serialized
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
cpdef object _serialize_with_stack(
|
|
1092
|
+
list serial_stack,
|
|
1093
|
+
list serialized,
|
|
1094
|
+
dict context,
|
|
1095
|
+
_IdContextHolder id_context_holder,
|
|
1096
|
+
list result_bufs_list,
|
|
1097
|
+
int64_t num_overflow = 0,
|
|
1098
|
+
int64_t num_total_serialized = 0,
|
|
1099
|
+
):
|
|
1100
|
+
cdef _SerialStackItem stack_item
|
|
1101
|
+
cdef list subs
|
|
1102
|
+
cdef bint final
|
|
1103
|
+
cdef int64_t num_sub_serialized
|
|
1104
|
+
cdef bint is_resume = num_total_serialized > 0
|
|
1105
|
+
|
|
1106
|
+
while serial_stack:
|
|
1107
|
+
stack_item = serial_stack[-1]
|
|
1108
|
+
if serialized is not None:
|
|
1109
|
+
# have previously-serialized results, record first
|
|
1110
|
+
stack_item.subs_serialized.append(serialized)
|
|
1111
|
+
|
|
1112
|
+
num_sub_serialized = len(stack_item.subs_serialized)
|
|
1113
|
+
if len(stack_item.subs) == num_sub_serialized:
|
|
1114
|
+
# all subcomponents serialized, serialization of current is done
|
|
1115
|
+
# and we can move to the parent object
|
|
1116
|
+
serialized = stack_item.serialized + stack_item.subs_serialized
|
|
1117
|
+
num_total_serialized += 1
|
|
1118
|
+
serial_stack.pop()
|
|
1119
|
+
else:
|
|
1120
|
+
# serialize next subcomponent at stack top
|
|
1121
|
+
serialized, subs, final = _serial_single(
|
|
1122
|
+
stack_item.subs[num_sub_serialized], context, id_context_holder
|
|
1123
|
+
)
|
|
1124
|
+
num_total_serialized += 1
|
|
1125
|
+
if final or not subs:
|
|
1126
|
+
# the subcomponent is a leaf
|
|
1127
|
+
if subs:
|
|
1128
|
+
result_bufs_list.extend(subs)
|
|
1129
|
+
else:
|
|
1130
|
+
# the subcomponent has its own subcomponents, we push itself
|
|
1131
|
+
# into stack and process its children
|
|
1132
|
+
stack_item = _SerialStackItem(serialized, subs)
|
|
1133
|
+
serial_stack.append(stack_item)
|
|
1134
|
+
# note that the serialized header should not be recorded
|
|
1135
|
+
# as we are now processing the subcomponent itself
|
|
1136
|
+
serialized = None
|
|
1137
|
+
if 0 < num_overflow < num_total_serialized:
|
|
1138
|
+
raise _SerializeObjectOverflow(serialized, num_total_serialized)
|
|
1139
|
+
|
|
1140
|
+
# we keep an empty dict for extra metas required for other modules
|
|
1141
|
+
if is_resume:
|
|
1142
|
+
# returns num of deserialized objects when resumed
|
|
1143
|
+
extra_meta = {"_N": num_total_serialized}
|
|
1144
|
+
else:
|
|
1145
|
+
# otherwise does not record the number to reduce result size
|
|
1146
|
+
extra_meta = {}
|
|
1147
|
+
return [extra_meta, serialized], result_bufs_list
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
def serialize(obj, dict context = None):
|
|
1151
|
+
"""
|
|
1152
|
+
Serialize an object and return a header and buffers.
|
|
1153
|
+
Buffers are intended for zero-copy data manipulation.
|
|
1154
|
+
|
|
1155
|
+
Parameters
|
|
1156
|
+
----------
|
|
1157
|
+
obj: Any
|
|
1158
|
+
Object to serialize
|
|
1159
|
+
context:
|
|
1160
|
+
Serialization context for instantiation of Placeholder
|
|
1161
|
+
objects
|
|
1162
|
+
|
|
1163
|
+
Returns
|
|
1164
|
+
-------
|
|
1165
|
+
result: Tuple[Tuple, List]
|
|
1166
|
+
Picklable header and buffers
|
|
1167
|
+
"""
|
|
1168
|
+
cdef list serial_stack = []
|
|
1169
|
+
cdef list result_bufs_list = []
|
|
1170
|
+
cdef list serialized
|
|
1171
|
+
cdef list subs
|
|
1172
|
+
cdef bint final
|
|
1173
|
+
cdef _IdContextHolder id_context_holder = _IdContextHolder()
|
|
1174
|
+
cdef tuple result
|
|
1175
|
+
|
|
1176
|
+
context = context if context is not None else dict()
|
|
1177
|
+
serialized, subs, final = _serial_single(obj, context, id_context_holder)
|
|
1178
|
+
if final or not subs:
|
|
1179
|
+
# marked as a leaf node, return directly
|
|
1180
|
+
result = [{}, serialized], subs
|
|
1181
|
+
else:
|
|
1182
|
+
serial_stack.append(_SerialStackItem(serialized, subs))
|
|
1183
|
+
result = _serialize_with_stack(
|
|
1184
|
+
serial_stack, None, context, id_context_holder, result_bufs_list
|
|
1185
|
+
)
|
|
1186
|
+
result[0][0]["_PUB"] = context.get(Serializer._public_data_context_key)
|
|
1187
|
+
return result
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
async def serialize_with_spawn(
|
|
1191
|
+
obj, dict context = None, int spawn_threshold = 100, object executor = None
|
|
1192
|
+
):
|
|
1193
|
+
"""
|
|
1194
|
+
Serialize an object and return a header and buffers.
|
|
1195
|
+
Buffers are intended for zero-copy data manipulation.
|
|
1196
|
+
|
|
1197
|
+
Parameters
|
|
1198
|
+
----------
|
|
1199
|
+
obj: Any
|
|
1200
|
+
Object to serialize
|
|
1201
|
+
context: Dict
|
|
1202
|
+
Serialization context for instantiation of Placeholder
|
|
1203
|
+
objects
|
|
1204
|
+
spawn_threshold: int
|
|
1205
|
+
Threshold to spawn into a ThreadPoolExecutor
|
|
1206
|
+
executor: ThreadPoolExecutor
|
|
1207
|
+
ThreadPoolExecutor to spawn rest serialization into
|
|
1208
|
+
|
|
1209
|
+
Returns
|
|
1210
|
+
-------
|
|
1211
|
+
result: Tuple[Tuple, List]
|
|
1212
|
+
Picklable header and buffers
|
|
1213
|
+
"""
|
|
1214
|
+
cdef list serial_stack = []
|
|
1215
|
+
cdef list result_bufs_list = []
|
|
1216
|
+
cdef list serialized
|
|
1217
|
+
cdef list subs
|
|
1218
|
+
cdef bint final
|
|
1219
|
+
cdef _IdContextHolder id_context_holder = _IdContextHolder()
|
|
1220
|
+
cdef tuple result
|
|
1221
|
+
|
|
1222
|
+
context = context if context is not None else dict()
|
|
1223
|
+
serialized, subs, final = _serial_single(obj, context, id_context_holder)
|
|
1224
|
+
if final or not subs:
|
|
1225
|
+
# marked as a leaf node, return directly
|
|
1226
|
+
result = [{}, serialized], subs
|
|
1227
|
+
else:
|
|
1228
|
+
serial_stack.append(_SerialStackItem(serialized, subs))
|
|
1229
|
+
|
|
1230
|
+
try:
|
|
1231
|
+
result = _serialize_with_stack(
|
|
1232
|
+
serial_stack,
|
|
1233
|
+
None,
|
|
1234
|
+
context,
|
|
1235
|
+
id_context_holder,
|
|
1236
|
+
result_bufs_list,
|
|
1237
|
+
spawn_threshold,
|
|
1238
|
+
)
|
|
1239
|
+
except _SerializeObjectOverflow as ex:
|
|
1240
|
+
result = await asyncio.get_running_loop().run_in_executor(
|
|
1241
|
+
executor,
|
|
1242
|
+
_serialize_with_stack,
|
|
1243
|
+
serial_stack,
|
|
1244
|
+
ex.cur_serialized,
|
|
1245
|
+
context,
|
|
1246
|
+
id_context_holder,
|
|
1247
|
+
result_bufs_list,
|
|
1248
|
+
0,
|
|
1249
|
+
ex.num_total_serialized,
|
|
1250
|
+
)
|
|
1251
|
+
result[0][0]["_PUB"] = context.get(Serializer._public_data_context_key)
|
|
1252
|
+
return result
|
|
1253
|
+
|
|
1254
|
+
|
|
1255
|
+
cdef object deserialize_impl
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
def deserialize(list serialized, list buffers, dict context = None):
|
|
1259
|
+
"""
|
|
1260
|
+
Deserialize an object with serialized headers and buffers
|
|
1261
|
+
|
|
1262
|
+
Parameters
|
|
1263
|
+
----------
|
|
1264
|
+
serialized: List
|
|
1265
|
+
Serialized object header
|
|
1266
|
+
buffers: List
|
|
1267
|
+
List of buffers extracted from serialize() calls
|
|
1268
|
+
context: Dict
|
|
1269
|
+
Serialization context for replacing Placeholder
|
|
1270
|
+
objects
|
|
1271
|
+
|
|
1272
|
+
Returns
|
|
1273
|
+
-------
|
|
1274
|
+
result: Any
|
|
1275
|
+
Deserialized object
|
|
1276
|
+
"""
|
|
1277
|
+
global deserialize_impl
|
|
1278
|
+
|
|
1279
|
+
if deserialize_impl is None:
|
|
1280
|
+
from .deserializer import deserialize as deserialize_impl
|
|
1281
|
+
|
|
1282
|
+
return deserialize_impl(serialized, buffers, context)
|