maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Dict, Optional, Union
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...serialization.serializables import AnyField, BoolField, DictField, StringField
|
|
19
|
+
from ..utils import parse_index
|
|
20
|
+
from .core import LakeDataStore
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DataFrameToJSON(LakeDataStore):
|
|
24
|
+
_op_type_ = opcodes.TO_JSON
|
|
25
|
+
|
|
26
|
+
orient = StringField("orient", default=None)
|
|
27
|
+
default_handler = AnyField("default_handler", default=None)
|
|
28
|
+
lines = BoolField("lines", default=None)
|
|
29
|
+
index = BoolField("index", default=None)
|
|
30
|
+
json_kwargs = DictField("json_kwargs", default=None)
|
|
31
|
+
|
|
32
|
+
def __init__(self, output_types=None, **kw):
|
|
33
|
+
super().__init__(_output_types=output_types, **kw)
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def one_file(self):
|
|
37
|
+
# if wildcard in path, write json into multiple files
|
|
38
|
+
return "*" not in self.path and not self.partition_cols
|
|
39
|
+
|
|
40
|
+
def __call__(self, df):
|
|
41
|
+
if self.one_file and (self.orient != "records" or not self.lines):
|
|
42
|
+
raise ValueError(
|
|
43
|
+
"You can only fix one single output file when orient='records' and lines is True"
|
|
44
|
+
)
|
|
45
|
+
index_value = parse_index(df.index_value.to_pandas()[:0], df)
|
|
46
|
+
if df.ndim == 2:
|
|
47
|
+
columns_value = parse_index(
|
|
48
|
+
df.columns_value.to_pandas()[:0], store_data=True
|
|
49
|
+
)
|
|
50
|
+
return self.new_dataframe(
|
|
51
|
+
[df],
|
|
52
|
+
shape=(0, 0),
|
|
53
|
+
dtypes=df.dtypes[:0],
|
|
54
|
+
index_value=index_value,
|
|
55
|
+
columns_value=columns_value,
|
|
56
|
+
)
|
|
57
|
+
else:
|
|
58
|
+
return self.new_series(
|
|
59
|
+
[df], shape=(0,), dtype=df.dtype, index_value=index_value
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def to_json(
|
|
64
|
+
df,
|
|
65
|
+
path: Optional[str] = None,
|
|
66
|
+
orient: Optional[str] = None,
|
|
67
|
+
date_format: Optional[str] = None,
|
|
68
|
+
double_precision: int = 10,
|
|
69
|
+
force_ascii: bool = True,
|
|
70
|
+
date_unit: Optional[str] = "ms",
|
|
71
|
+
default_handler: Optional[callable] = None,
|
|
72
|
+
lines: bool = False,
|
|
73
|
+
compression: Union[str, Dict[str, Any], None] = "infer",
|
|
74
|
+
index: Optional[bool] = None,
|
|
75
|
+
indent: Optional[int] = None,
|
|
76
|
+
storage_options: Optional[Dict[str, Any]] = None,
|
|
77
|
+
partition_cols: Optional[Union[str, list]] = None,
|
|
78
|
+
**kwargs,
|
|
79
|
+
):
|
|
80
|
+
r"""
|
|
81
|
+
Convert the object to a JSON string.
|
|
82
|
+
|
|
83
|
+
Note NaN's and None will be converted to null and datetime objects
|
|
84
|
+
will be converted to UNIX timestamps.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
path : str, path object, file-like object, or None, default None
|
|
89
|
+
String, path object (implementing os.PathLike[str]), or file-like
|
|
90
|
+
object implementing a write() function. If None, the result is
|
|
91
|
+
returned as a string.
|
|
92
|
+
orient : str
|
|
93
|
+
Indication of expected JSON string format.
|
|
94
|
+
|
|
95
|
+
* Series:
|
|
96
|
+
|
|
97
|
+
- default is 'index'
|
|
98
|
+
- allowed values are: {'split', 'records', 'index', 'table'}.
|
|
99
|
+
|
|
100
|
+
* DataFrame:
|
|
101
|
+
|
|
102
|
+
- default is 'columns'
|
|
103
|
+
- allowed values are: {'split', 'records', 'index', 'columns',
|
|
104
|
+
'values', 'table'}.
|
|
105
|
+
|
|
106
|
+
* The format of the JSON string:
|
|
107
|
+
|
|
108
|
+
- 'split' : dict like {'index' -> [index], 'columns' -> [columns],
|
|
109
|
+
'data' -> [values]}
|
|
110
|
+
- 'records' : list like [{column -> value}, ... , {column -> value}]
|
|
111
|
+
- 'index' : dict like {index -> {column -> value}}
|
|
112
|
+
- 'columns' : dict like {column -> {index -> value}}
|
|
113
|
+
- 'values' : just the values array
|
|
114
|
+
- 'table' : dict like {'schema': {schema}, 'data': {data}}
|
|
115
|
+
|
|
116
|
+
Describing the data, where data component is like ``orient='records'``.
|
|
117
|
+
|
|
118
|
+
date_format : {None, 'epoch', 'iso'}
|
|
119
|
+
Type of date conversion. 'epoch' = epoch milliseconds,
|
|
120
|
+
'iso' = ISO8601. The default depends on the `orient`. For
|
|
121
|
+
``orient='table'``, the default is 'iso'. For all other orients,
|
|
122
|
+
the default is 'epoch'.
|
|
123
|
+
double_precision : int, default 10
|
|
124
|
+
The number of decimal places to use when encoding
|
|
125
|
+
floating point numbers.
|
|
126
|
+
force_ascii : bool, default True
|
|
127
|
+
Force encoded string to be ASCII.
|
|
128
|
+
date_unit : str, default 'ms' (milliseconds)
|
|
129
|
+
The time unit to encode to, governs timestamp and ISO8601
|
|
130
|
+
precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
|
|
131
|
+
microsecond, and nanosecond respectively.
|
|
132
|
+
default_handler : callable, default None
|
|
133
|
+
Handler to call if object cannot otherwise be converted to a
|
|
134
|
+
suitable format for JSON. Should receive a single argument which is
|
|
135
|
+
the object to convert and return a serializable object.
|
|
136
|
+
lines : bool, default False
|
|
137
|
+
If 'orient' is 'records' write out line-delimited json format. Will
|
|
138
|
+
throw ValueError if incorrect 'orient' is used.
|
|
139
|
+
compression : str or dict, default 'infer'
|
|
140
|
+
For on-the-fly compression of the output data. If str, represents
|
|
141
|
+
compression mode. If dict, value at 'method' is the compression mode.
|
|
142
|
+
Compression mode may be any of the following possible
|
|
143
|
+
values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If compression
|
|
144
|
+
mode is 'infer' and `path_or_buf` is path-like, then detect
|
|
145
|
+
compression mode from the following extensions: '.gz', '.bz2',
|
|
146
|
+
'.zip' or '.xz'. (otherwise no compression). If dict given and
|
|
147
|
+
mode is one of {'zip', 'xz'}, other entries passed as
|
|
148
|
+
additional compression options.
|
|
149
|
+
index : bool, default None
|
|
150
|
+
Whether to include the index values in the JSON string. Not
|
|
151
|
+
including the index (``index=False``) is only supported when
|
|
152
|
+
orient is 'split' or 'table'.
|
|
153
|
+
indent : int, optional
|
|
154
|
+
Length of whitespace used to indent each record.
|
|
155
|
+
partition_cols : list, optional, default None
|
|
156
|
+
Column names by which to partition the dataset.
|
|
157
|
+
Columns are partitioned in the order they are given.
|
|
158
|
+
|
|
159
|
+
See Also
|
|
160
|
+
--------
|
|
161
|
+
read_json : Convert a JSON string to pandas object.
|
|
162
|
+
|
|
163
|
+
Notes
|
|
164
|
+
-----
|
|
165
|
+
The behavior of ``indent=0`` varies from the stdlib, which does not
|
|
166
|
+
indent the output but does insert newlines. Currently, ``indent=0``
|
|
167
|
+
and the default ``indent=None`` are equivalent in pandas, though this
|
|
168
|
+
may change in a future release.
|
|
169
|
+
|
|
170
|
+
``orient='table'`` contains a 'pandas_version' field under 'schema'.
|
|
171
|
+
This stores the version of `pandas` used in the latest revision of the
|
|
172
|
+
schema.
|
|
173
|
+
|
|
174
|
+
Examples
|
|
175
|
+
--------
|
|
176
|
+
>>> import maxframe.dataframe as md
|
|
177
|
+
>>> df = md.DataFrame([['a', 'b'], ['c', 'd']],
|
|
178
|
+
... index=['row 1', 'row 2'],
|
|
179
|
+
... columns=['col 1', 'col 2'])
|
|
180
|
+
>>> df.to_json('data.json')
|
|
181
|
+
>>> # Writing to a file with orient='records'
|
|
182
|
+
>>> df.to_json('records.json', orient='records') # doctest: +SKIP
|
|
183
|
+
>>> # Writing in line-delimited json format
|
|
184
|
+
>>> df.to_json('ldjson.json', orient='records', lines=True) # doctest: +SKIP
|
|
185
|
+
>>> # Write partitioned dataset
|
|
186
|
+
>>> df.to_json('dataset', partition_cols=['col 1']) # doctest: +SKIP
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
if kwargs:
|
|
190
|
+
raise TypeError(
|
|
191
|
+
f"to_json() got an unexpected keyword argument '{next(iter(kwargs))}'"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if path is None:
|
|
195
|
+
raise NotImplementedError("Currently only support to_json with path specified")
|
|
196
|
+
|
|
197
|
+
json_kwargs = dict(
|
|
198
|
+
date_format=date_format,
|
|
199
|
+
double_precision=double_precision,
|
|
200
|
+
force_ascii=force_ascii,
|
|
201
|
+
date_unit=date_unit,
|
|
202
|
+
indent=indent,
|
|
203
|
+
)
|
|
204
|
+
op = DataFrameToJSON(
|
|
205
|
+
path=path,
|
|
206
|
+
orient=orient,
|
|
207
|
+
default_handler=default_handler,
|
|
208
|
+
lines=lines,
|
|
209
|
+
compression=compression,
|
|
210
|
+
index=index,
|
|
211
|
+
storage_options=storage_options,
|
|
212
|
+
partition_cols=partition_cols,
|
|
213
|
+
json_kwargs=json_kwargs,
|
|
214
|
+
)
|
|
215
|
+
return op(df)
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import itertools
|
|
16
|
+
import logging
|
|
17
|
+
from collections import OrderedDict
|
|
18
|
+
from typing import Any, Dict, List, Optional, Union
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
from odps import ODPS
|
|
22
|
+
from odps.models import Table as ODPSTable
|
|
23
|
+
from odps.types import OdpsSchema, PartitionSpec
|
|
24
|
+
|
|
25
|
+
from ... import opcodes
|
|
26
|
+
from ...config import options
|
|
27
|
+
from ...core import OutputType
|
|
28
|
+
from ...io.odpsio import build_dataframe_table_meta
|
|
29
|
+
from ...io.odpsio.schema import odps_schema_to_arrow_schema
|
|
30
|
+
from ...lib.dtypes_extension import ArrowDtype
|
|
31
|
+
from ...serialization.serializables import (
|
|
32
|
+
BoolField,
|
|
33
|
+
DictField,
|
|
34
|
+
FieldTypes,
|
|
35
|
+
Int64Field,
|
|
36
|
+
ListField,
|
|
37
|
+
SeriesField,
|
|
38
|
+
StringField,
|
|
39
|
+
)
|
|
40
|
+
from ...typing_ import TileableType
|
|
41
|
+
from ..core import DataFrame # noqa: F401
|
|
42
|
+
from ..utils import parse_index
|
|
43
|
+
from .core import DataFrameDataStore
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class DataFrameToODPSTable(DataFrameDataStore):
|
|
49
|
+
_op_type_ = opcodes.TO_ODPS_TABLE
|
|
50
|
+
|
|
51
|
+
dtypes = SeriesField("dtypes")
|
|
52
|
+
|
|
53
|
+
table_name = StringField("table_name", default=None)
|
|
54
|
+
partition_spec = StringField("partition_spec", default=None)
|
|
55
|
+
partition_columns = ListField("partition_columns", FieldTypes.string, default=None)
|
|
56
|
+
overwrite = BoolField("overwrite", default=None)
|
|
57
|
+
write_batch_size = Int64Field("write_batch_size", default=None)
|
|
58
|
+
unknown_as_string = BoolField("unknown_as_string", default=None)
|
|
59
|
+
index = BoolField("index", default=True)
|
|
60
|
+
index_label = ListField("index_label", FieldTypes.string, default=None)
|
|
61
|
+
lifecycle = Int64Field("lifecycle", default=None)
|
|
62
|
+
table_properties = DictField("table_properties", default=None)
|
|
63
|
+
primary_key = ListField("primary_key", FieldTypes.string, default=None)
|
|
64
|
+
use_generated_table_meta = BoolField("use_generated_table_meta", default=False)
|
|
65
|
+
|
|
66
|
+
def __init__(self, **kw):
|
|
67
|
+
super().__init__(_output_types=[OutputType.dataframe], **kw)
|
|
68
|
+
|
|
69
|
+
def check_inputs(self, inputs: List[TileableType]):
|
|
70
|
+
if self.use_generated_table_meta:
|
|
71
|
+
return None
|
|
72
|
+
return super().check_inputs(inputs)
|
|
73
|
+
|
|
74
|
+
def __call__(self, x):
|
|
75
|
+
shape = (0,) * len(x.shape)
|
|
76
|
+
index_value = parse_index(x.index_value.to_pandas()[:0], x.key, "index")
|
|
77
|
+
columns_value = parse_index(
|
|
78
|
+
x.columns_value.to_pandas()[:0], x.key, "columns", store_data=True
|
|
79
|
+
)
|
|
80
|
+
return self.new_dataframe(
|
|
81
|
+
[x],
|
|
82
|
+
shape=shape,
|
|
83
|
+
dtypes=x.dtypes[:0],
|
|
84
|
+
index_value=index_value,
|
|
85
|
+
columns_value=columns_value,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def get_index_mapping(
|
|
90
|
+
cls,
|
|
91
|
+
index_label: Optional[List[str]],
|
|
92
|
+
raw_index_levels: List[Any],
|
|
93
|
+
) -> List[Any]:
|
|
94
|
+
def_labels = index_label or itertools.repeat(None)
|
|
95
|
+
def_labels = itertools.chain(def_labels, itertools.repeat(None))
|
|
96
|
+
names = raw_index_levels
|
|
97
|
+
if len(names) == 1:
|
|
98
|
+
default_labels = ["index"]
|
|
99
|
+
else:
|
|
100
|
+
default_labels = [f"level_{i}" for i in range(len(names))]
|
|
101
|
+
indexes = [
|
|
102
|
+
def_label or name or label
|
|
103
|
+
for def_label, name, label in zip(def_labels, names, default_labels)
|
|
104
|
+
]
|
|
105
|
+
return [x.lower() for x in indexes]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def to_odps_table(
|
|
109
|
+
df: TileableType,
|
|
110
|
+
table: Union[ODPSTable, str],
|
|
111
|
+
partition: Optional[str] = None,
|
|
112
|
+
partition_col: Union[None, str, List[str]] = None,
|
|
113
|
+
overwrite: bool = False,
|
|
114
|
+
unknown_as_string: Optional[bool] = True,
|
|
115
|
+
index: bool = True,
|
|
116
|
+
index_label: Union[None, str, List[str]] = None,
|
|
117
|
+
lifecycle: Optional[int] = None,
|
|
118
|
+
table_properties: Optional[dict] = None,
|
|
119
|
+
primary_key: Union[None, str, List[str]] = None,
|
|
120
|
+
odps_types: Optional[Dict[str, str]] = None,
|
|
121
|
+
):
|
|
122
|
+
"""
|
|
123
|
+
Write DataFrame object into a MaxCompute (ODPS) table.
|
|
124
|
+
|
|
125
|
+
You need to provide the name of the table to write to. If you want to store
|
|
126
|
+
data into a specific partitioned of a table, argument `partition` can be used.
|
|
127
|
+
You can also use `partition_col` to specify DataFrame columns as partition
|
|
128
|
+
columns, and data in the DataFrame will be grouped by these columns and
|
|
129
|
+
inserted into partitions the values of these columns.
|
|
130
|
+
|
|
131
|
+
If the table does not exist, `to_odps_table` will create one.
|
|
132
|
+
|
|
133
|
+
Column names for indexes is determined by `index_label` argument. If the
|
|
134
|
+
argument is absent, names of the levels is used if they are not None, or
|
|
135
|
+
default names will be used. The default name for indexes with only one level
|
|
136
|
+
will be `index`, and for indexes with multiple levels, the name will be
|
|
137
|
+
`level_x` while x is the index of the level.
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
table: str
|
|
142
|
+
Name ot the table to write DataFrame into
|
|
143
|
+
partition: Optional[str]
|
|
144
|
+
Spec of the partition to write to, can be 'pt1=xxx,pt2=yyy'
|
|
145
|
+
partition_col: Union[None, str, List[str]]
|
|
146
|
+
Name of columns in DataFrame as partition columns.
|
|
147
|
+
overwrite: bool
|
|
148
|
+
Overwrite data if the table / partition already exists.
|
|
149
|
+
unknown_as_string: bool
|
|
150
|
+
If True, object type in the DataFrame will be treated as strings.
|
|
151
|
+
Otherwise errors might be raised.
|
|
152
|
+
index: bool
|
|
153
|
+
If True, indexes will be stored. Otherwise they are ignored.
|
|
154
|
+
index_label: Union[None, str, List[str]]
|
|
155
|
+
Specify column names for index levels. If absent, level names or default
|
|
156
|
+
names will be used.
|
|
157
|
+
lifecycle: Optional[int]
|
|
158
|
+
Specify lifecycle of the output table.
|
|
159
|
+
table_properties: Optional[dict]
|
|
160
|
+
Specify properties of the output table.
|
|
161
|
+
primary_key: Union[None, str, List[str]]
|
|
162
|
+
If provided and target table does not exist, target table
|
|
163
|
+
will be a delta table with columns specified in this argument
|
|
164
|
+
as primary key.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
result: DataFrame
|
|
169
|
+
Stub DataFrame for execution.
|
|
170
|
+
|
|
171
|
+
Notes
|
|
172
|
+
-----
|
|
173
|
+
`to_odps_table` returns a stub object for execution. The result returned is
|
|
174
|
+
not reusable.
|
|
175
|
+
|
|
176
|
+
Examples
|
|
177
|
+
--------
|
|
178
|
+
|
|
179
|
+
"""
|
|
180
|
+
odps_entry = ODPS.from_global() or ODPS.from_environments()
|
|
181
|
+
is_schema_enabled = (
|
|
182
|
+
options.session.enable_schema or odps_entry.is_schema_namespace_enabled()
|
|
183
|
+
)
|
|
184
|
+
if isinstance(table, ODPSTable):
|
|
185
|
+
table = table.full_table_name
|
|
186
|
+
elif is_schema_enabled and "." not in table:
|
|
187
|
+
default_schema = (
|
|
188
|
+
options.session.default_schema or odps_entry.schema or "default"
|
|
189
|
+
)
|
|
190
|
+
table = default_schema + "." + table
|
|
191
|
+
|
|
192
|
+
if isinstance(index_label, str):
|
|
193
|
+
index_label = [index_label]
|
|
194
|
+
if isinstance(partition_col, str):
|
|
195
|
+
partition_col = [partition_col]
|
|
196
|
+
|
|
197
|
+
if index_label and len(index_label) != len(df.index.names):
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"index_label needs {len(df.index.nlevels)} labels "
|
|
200
|
+
f"but it only have {len(index_label)}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# check if table partition columns conflicts with dataframe columns
|
|
204
|
+
table_cols = set(build_dataframe_table_meta(df).table_column_names)
|
|
205
|
+
partition_col_set = (
|
|
206
|
+
set(x.lower() for x in PartitionSpec(partition).keys()) if partition else set()
|
|
207
|
+
)
|
|
208
|
+
if partition:
|
|
209
|
+
partition_intersect = partition_col_set & table_cols
|
|
210
|
+
if partition_intersect:
|
|
211
|
+
raise ValueError(
|
|
212
|
+
f"Data column(s) {partition_intersect} in the dataframe"
|
|
213
|
+
" cannot be used in parameter 'partition'."
|
|
214
|
+
" Use 'partition_col' instead."
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if index:
|
|
218
|
+
index_cols = set(
|
|
219
|
+
DataFrameToODPSTable.get_index_mapping(index_label, df.index.names)
|
|
220
|
+
)
|
|
221
|
+
index_table_intersect = index_cols & table_cols
|
|
222
|
+
if index_table_intersect:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Index column(s) {index_table_intersect} conflict with "
|
|
225
|
+
f"column(s) of the input dataframe."
|
|
226
|
+
)
|
|
227
|
+
index_partition_intersect = index_cols & partition_col_set
|
|
228
|
+
if index_partition_intersect:
|
|
229
|
+
raise ValueError(
|
|
230
|
+
f"Index column(s) {index_partition_intersect} conflict "
|
|
231
|
+
f"with partition column(s)."
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if partition_col:
|
|
235
|
+
partition_diff = set(x.lower() for x in partition_col) - table_cols
|
|
236
|
+
if partition_diff:
|
|
237
|
+
raise ValueError(
|
|
238
|
+
f"Partition column(s) {partition_diff}"
|
|
239
|
+
" is not the data column(s) of the input dataframe."
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
table_properties = table_properties or {}
|
|
243
|
+
if primary_key is not None:
|
|
244
|
+
table_properties["transactional"] = "true"
|
|
245
|
+
if odps_entry.exist_table(table):
|
|
246
|
+
table_obj = odps_entry.get_table(table)
|
|
247
|
+
if table_obj.is_transactional:
|
|
248
|
+
table_properties = table_properties or {}
|
|
249
|
+
table_properties["transactional"] = "true"
|
|
250
|
+
primary_key = primary_key or table_obj.primary_key or ()
|
|
251
|
+
if set(primary_key) != set(table_obj.primary_key or ()):
|
|
252
|
+
raise ValueError(
|
|
253
|
+
f"Primary keys between existing table {table} and "
|
|
254
|
+
f"provided arguments are not same."
|
|
255
|
+
)
|
|
256
|
+
if primary_key and not isinstance(primary_key, (list, tuple)):
|
|
257
|
+
primary_key = [primary_key]
|
|
258
|
+
|
|
259
|
+
if odps_types is None:
|
|
260
|
+
target_dtypes = df.dtypes
|
|
261
|
+
else:
|
|
262
|
+
odps_schema = OdpsSchema.from_dict(odps_types)
|
|
263
|
+
arrow_schema = odps_schema_to_arrow_schema(odps_schema)
|
|
264
|
+
pd_col_to_dtype = {
|
|
265
|
+
nm: ArrowDtype(tp) for nm, tp in zip(arrow_schema.names, arrow_schema.types)
|
|
266
|
+
}
|
|
267
|
+
target_dtypes_dict = OrderedDict(
|
|
268
|
+
[(col, pd_col_to_dtype.get(col, dt)) for col, dt in df.dtypes.items()]
|
|
269
|
+
)
|
|
270
|
+
target_dtypes = pd.Series(target_dtypes_dict)
|
|
271
|
+
|
|
272
|
+
op = DataFrameToODPSTable(
|
|
273
|
+
dtypes=target_dtypes,
|
|
274
|
+
table_name=table,
|
|
275
|
+
unknown_as_string=unknown_as_string,
|
|
276
|
+
partition_spec=partition,
|
|
277
|
+
partition_columns=partition_col,
|
|
278
|
+
overwrite=overwrite,
|
|
279
|
+
index=index,
|
|
280
|
+
index_label=index_label,
|
|
281
|
+
lifecycle=lifecycle or options.session.table_lifecycle,
|
|
282
|
+
table_properties=table_properties or None,
|
|
283
|
+
primary_key=primary_key or None,
|
|
284
|
+
)
|
|
285
|
+
return op(df)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import BoolField, DictField, StringField
|
|
17
|
+
from ..datasource.read_parquet import check_engine
|
|
18
|
+
from ..utils import parse_index
|
|
19
|
+
from .core import LakeDataStore
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataFrameToParquet(LakeDataStore):
|
|
23
|
+
_op_type_ = opcodes.TO_PARQUET
|
|
24
|
+
|
|
25
|
+
engine = StringField("engine")
|
|
26
|
+
index = BoolField("index")
|
|
27
|
+
additional_kwargs = DictField("additional_kwargs")
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def _get_path(cls, path, i):
|
|
31
|
+
if "*" not in path:
|
|
32
|
+
return path
|
|
33
|
+
return path.replace("*", str(i))
|
|
34
|
+
|
|
35
|
+
def __call__(self, df):
|
|
36
|
+
index_value = parse_index(df.index_value.to_pandas()[:0], df)
|
|
37
|
+
columns_value = parse_index(df.columns_value.to_pandas()[:0], store_data=True)
|
|
38
|
+
return self.new_dataframe(
|
|
39
|
+
[df],
|
|
40
|
+
shape=(0, 0),
|
|
41
|
+
dtypes=df.dtypes[:0],
|
|
42
|
+
index_value=index_value,
|
|
43
|
+
columns_value=columns_value,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def to_parquet(
|
|
48
|
+
df,
|
|
49
|
+
path,
|
|
50
|
+
engine="auto",
|
|
51
|
+
compression="snappy",
|
|
52
|
+
index=None,
|
|
53
|
+
partition_cols=None,
|
|
54
|
+
storage_options: dict = None,
|
|
55
|
+
**kwargs,
|
|
56
|
+
):
|
|
57
|
+
"""
|
|
58
|
+
Write a DataFrame to the binary parquet format, each chunk will be
|
|
59
|
+
written to a Parquet file.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
path : str or file-like object
|
|
64
|
+
If path is a string with wildcard e.g. '/to/path/out-*.parquet',
|
|
65
|
+
`to_parquet` will try to write multiple files, for instance,
|
|
66
|
+
chunk (0, 0) will write data into '/to/path/out-0.parquet'.
|
|
67
|
+
If path is a string without wildcard, we will treat it as a directory.
|
|
68
|
+
|
|
69
|
+
engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
|
|
70
|
+
Parquet library to use. The default behavior is to try 'pyarrow',
|
|
71
|
+
falling back to 'fastparquet' if 'pyarrow' is unavailable.
|
|
72
|
+
|
|
73
|
+
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
|
|
74
|
+
Name of the compression to use. Use ``None`` for no compression.
|
|
75
|
+
|
|
76
|
+
index : bool, default None
|
|
77
|
+
If ``True``, include the dataframe's index(es) in the file output.
|
|
78
|
+
If ``False``, they will not be written to the file.
|
|
79
|
+
If ``None``, similar to ``True`` the dataframe's index(es)
|
|
80
|
+
will be saved. However, instead of being saved as values,
|
|
81
|
+
the RangeIndex will be stored as a range in the metadata so it
|
|
82
|
+
doesn't require much space and is faster. Other indexes will
|
|
83
|
+
be included as columns in the file output.
|
|
84
|
+
|
|
85
|
+
partition_cols : list, optional, default None
|
|
86
|
+
Column names by which to partition the dataset.
|
|
87
|
+
Columns are partitioned in the order they are given.
|
|
88
|
+
Must be None if path is not a string.
|
|
89
|
+
|
|
90
|
+
**kwargs
|
|
91
|
+
Additional arguments passed to the parquet library.
|
|
92
|
+
|
|
93
|
+
Examples
|
|
94
|
+
--------
|
|
95
|
+
>>> import maxframe.dataframe as md
|
|
96
|
+
>>> df = md.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]})
|
|
97
|
+
>>> df.to_parquet('*.parquet.gzip',
|
|
98
|
+
... compression='gzip').execute() # doctest: +SKIP
|
|
99
|
+
>>> md.read_parquet('*.parquet.gzip').execute() # doctest: +SKIP
|
|
100
|
+
col1 col2
|
|
101
|
+
0 1 3
|
|
102
|
+
1 2 4
|
|
103
|
+
|
|
104
|
+
>>> import io
|
|
105
|
+
>>> f = io.BytesIO()
|
|
106
|
+
>>> df.to_parquet(f).execute()
|
|
107
|
+
>>> f.seek(0)
|
|
108
|
+
0
|
|
109
|
+
>>> content = f.read()
|
|
110
|
+
"""
|
|
111
|
+
engine = check_engine(engine)
|
|
112
|
+
op = DataFrameToParquet(
|
|
113
|
+
path=path,
|
|
114
|
+
engine=engine,
|
|
115
|
+
compression=compression,
|
|
116
|
+
index=index,
|
|
117
|
+
partition_cols=partition_cols,
|
|
118
|
+
storage_options=storage_options,
|
|
119
|
+
additional_kwargs=kwargs,
|
|
120
|
+
)
|
|
121
|
+
return op(df)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import CachedAccessor
|
|
16
|
+
from .accessor import (
|
|
17
|
+
DataFrameMaxFrameAccessor,
|
|
18
|
+
IndexMaxFrameAccessor,
|
|
19
|
+
SeriesMaxFrameAccessor,
|
|
20
|
+
)
|
|
21
|
+
from .apply_chunk import (
|
|
22
|
+
DataFrameApplyChunk,
|
|
23
|
+
DataFrameApplyChunkOperator,
|
|
24
|
+
df_apply_chunk,
|
|
25
|
+
series_apply_chunk,
|
|
26
|
+
)
|
|
27
|
+
from .cartesian_chunk import cartesian_chunk
|
|
28
|
+
from .collect_kv import collect_kv
|
|
29
|
+
from .extract_kv import extract_kv
|
|
30
|
+
from .flatjson import series_flatjson
|
|
31
|
+
from .flatmap import df_flatmap, series_flatmap
|
|
32
|
+
from .map_reduce import map_reduce
|
|
33
|
+
from .rebalance import DataFrameRebalance, rebalance
|
|
34
|
+
from .reshuffle import DataFrameReshuffle, df_reshuffle
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _install():
|
|
38
|
+
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
39
|
+
|
|
40
|
+
DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
|
|
41
|
+
DataFrameMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
|
|
42
|
+
DataFrameMaxFrameAccessor._register("collect_kv", collect_kv)
|
|
43
|
+
DataFrameMaxFrameAccessor._register("extract_kv", extract_kv)
|
|
44
|
+
DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
|
|
45
|
+
DataFrameMaxFrameAccessor._register("map_reduce", map_reduce)
|
|
46
|
+
DataFrameMaxFrameAccessor._register("rebalance", rebalance)
|
|
47
|
+
DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
|
|
48
|
+
|
|
49
|
+
SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
|
|
50
|
+
SeriesMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
|
|
51
|
+
SeriesMaxFrameAccessor._register("extract_kv", extract_kv)
|
|
52
|
+
SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
|
|
53
|
+
SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
|
|
54
|
+
SeriesMaxFrameAccessor._register("rebalance", rebalance)
|
|
55
|
+
|
|
56
|
+
IndexMaxFrameAccessor._register("rebalance", rebalance)
|
|
57
|
+
|
|
58
|
+
if DataFrameMaxFrameAccessor._api_count:
|
|
59
|
+
for t in DATAFRAME_TYPE:
|
|
60
|
+
t.mf = CachedAccessor("mf", DataFrameMaxFrameAccessor)
|
|
61
|
+
if SeriesMaxFrameAccessor._api_count:
|
|
62
|
+
for t in SERIES_TYPE:
|
|
63
|
+
t.mf = CachedAccessor("mf", SeriesMaxFrameAccessor)
|
|
64
|
+
if IndexMaxFrameAccessor._api_count:
|
|
65
|
+
for t in INDEX_TYPE:
|
|
66
|
+
t.mf = CachedAccessor("mf", IndexMaxFrameAccessor)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
_install()
|
|
70
|
+
del _install
|