maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,2386 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
import operator
|
|
17
|
+
import weakref
|
|
18
|
+
from collections.abc import Iterable
|
|
19
|
+
from io import StringIO
|
|
20
|
+
from typing import Any, Dict, List, Tuple, Union
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
import pandas as pd
|
|
24
|
+
|
|
25
|
+
from ..core import (
|
|
26
|
+
ENTITY_TYPE,
|
|
27
|
+
HasShapeTileable,
|
|
28
|
+
HasShapeTileableData,
|
|
29
|
+
OutputType,
|
|
30
|
+
Tileable,
|
|
31
|
+
_ExecuteAndFetchMixin,
|
|
32
|
+
is_build_mode,
|
|
33
|
+
register_output_types,
|
|
34
|
+
)
|
|
35
|
+
from ..core.entity.utils import fill_chunk_slices, refresh_tileable_shape
|
|
36
|
+
from ..protocol import DataFrameTableMeta
|
|
37
|
+
from ..serialization.serializables import (
|
|
38
|
+
AnyField,
|
|
39
|
+
BoolField,
|
|
40
|
+
DataTypeField,
|
|
41
|
+
DictField,
|
|
42
|
+
Int32Field,
|
|
43
|
+
IntervalArrayField,
|
|
44
|
+
ListField,
|
|
45
|
+
NDArrayField,
|
|
46
|
+
OneOfField,
|
|
47
|
+
ReferenceField,
|
|
48
|
+
Serializable,
|
|
49
|
+
SeriesField,
|
|
50
|
+
SliceField,
|
|
51
|
+
StringField,
|
|
52
|
+
)
|
|
53
|
+
from ..session import get_default_session
|
|
54
|
+
from ..utils import (
|
|
55
|
+
calc_nsplits,
|
|
56
|
+
ceildiv,
|
|
57
|
+
estimate_pandas_size,
|
|
58
|
+
on_serialize_numpy_type,
|
|
59
|
+
pd_release_version,
|
|
60
|
+
prevent_called_from_pandas,
|
|
61
|
+
tokenize,
|
|
62
|
+
)
|
|
63
|
+
from .typing_ import DataFrameType, IndexType, SeriesType
|
|
64
|
+
from .utils import (
|
|
65
|
+
ReprSeries,
|
|
66
|
+
apply_if_callable,
|
|
67
|
+
fetch_corner_data,
|
|
68
|
+
merge_index_value,
|
|
69
|
+
parse_index,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
_df_with_iteritems = pd_release_version[:2] < (2, 0)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class IndexValue(Serializable):
|
|
76
|
+
"""
|
|
77
|
+
Meta class for index, held by IndexData, SeriesData and DataFrameData
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
__slots__ = ()
|
|
81
|
+
|
|
82
|
+
class IndexBase(Serializable):
|
|
83
|
+
_key = StringField("key") # to identify if the index is the same
|
|
84
|
+
_is_monotonic_increasing = BoolField("is_monotonic_increasing")
|
|
85
|
+
_is_monotonic_decreasing = BoolField("is_monotonic_decreasing")
|
|
86
|
+
_is_unique = BoolField("is_unique")
|
|
87
|
+
_max_val = AnyField("max_val", on_serialize=on_serialize_numpy_type)
|
|
88
|
+
_max_val_close = BoolField("max_val_close")
|
|
89
|
+
_min_val = AnyField("min_val", on_serialize=on_serialize_numpy_type)
|
|
90
|
+
_min_val_close = BoolField("min_val_close")
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def is_monotonic_increasing(self):
|
|
94
|
+
return self._is_monotonic_increasing
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def is_monotonic_decreasing(self):
|
|
98
|
+
return self._is_monotonic_decreasing
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def is_unique(self):
|
|
102
|
+
return self._is_unique
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def min_val(self):
|
|
106
|
+
return self._min_val
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def min_val_close(self):
|
|
110
|
+
return self._min_val_close
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def max_val(self):
|
|
114
|
+
return self._max_val
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def max_val_close(self):
|
|
118
|
+
return self._max_val_close
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def key(self):
|
|
122
|
+
return self._key
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def inferred_type(self):
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
def to_pandas(self):
|
|
129
|
+
kw = {
|
|
130
|
+
field.tag: getattr(self, attr, None)
|
|
131
|
+
for attr, field in self._FIELDS.items()
|
|
132
|
+
if attr not in super(type(self), self)._FIELDS
|
|
133
|
+
}
|
|
134
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
135
|
+
if kw.get("data") is None:
|
|
136
|
+
kw["data"] = []
|
|
137
|
+
|
|
138
|
+
pd_initializer = getattr(self, "_pd_initializer", None)
|
|
139
|
+
if pd_initializer is None:
|
|
140
|
+
pd_initializer = getattr(pd, type(self).__name__)
|
|
141
|
+
return pd_initializer(**kw)
|
|
142
|
+
|
|
143
|
+
class Index(IndexBase):
|
|
144
|
+
_name = AnyField("name")
|
|
145
|
+
_data = NDArrayField("data")
|
|
146
|
+
_dtype = DataTypeField("dtype")
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def dtype(self):
|
|
150
|
+
return getattr(self, "_dtype", None)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def inferred_type(self):
|
|
154
|
+
return "floating" if self.dtype.kind == "f" else "integer"
|
|
155
|
+
|
|
156
|
+
class RangeIndex(IndexBase):
|
|
157
|
+
_name = AnyField("name")
|
|
158
|
+
_slice = SliceField("slice")
|
|
159
|
+
_dtype = DataTypeField("dtype")
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def slice(self):
|
|
163
|
+
return self._slice
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def dtype(self):
|
|
167
|
+
return getattr(self, "_dtype", np.dtype(np.intc))
|
|
168
|
+
|
|
169
|
+
def to_pandas(self):
|
|
170
|
+
slc = self._slice
|
|
171
|
+
return pd.RangeIndex(
|
|
172
|
+
slc.start, slc.stop, slc.step, name=getattr(self, "_name", None)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
class CategoricalIndex(IndexBase):
|
|
176
|
+
_name = AnyField("name")
|
|
177
|
+
_data = NDArrayField("data")
|
|
178
|
+
_categories = AnyField("categories")
|
|
179
|
+
_ordered = BoolField("ordered")
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def inferred_type(self):
|
|
183
|
+
return "categorical"
|
|
184
|
+
|
|
185
|
+
class IntervalIndex(IndexBase):
|
|
186
|
+
_name = AnyField("name")
|
|
187
|
+
_data = IntervalArrayField("data")
|
|
188
|
+
_closed = StringField("closed")
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def inferred_type(self):
|
|
192
|
+
return "interval"
|
|
193
|
+
|
|
194
|
+
class DatetimeIndex(IndexBase):
|
|
195
|
+
_name = AnyField("name")
|
|
196
|
+
_data = NDArrayField("data")
|
|
197
|
+
_freq = AnyField("freq")
|
|
198
|
+
_start = AnyField("start")
|
|
199
|
+
_periods = AnyField("periods")
|
|
200
|
+
_end = AnyField("end")
|
|
201
|
+
_closed = AnyField("closed")
|
|
202
|
+
_tz = AnyField("tz")
|
|
203
|
+
_ambiguous = AnyField("ambiguous")
|
|
204
|
+
_dayfirst = BoolField("dayfirst")
|
|
205
|
+
_yearfirst = BoolField("yearfirst")
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def inferred_type(self):
|
|
209
|
+
return "datetime64"
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def freq(self):
|
|
213
|
+
return getattr(self, "_freq", None)
|
|
214
|
+
|
|
215
|
+
class TimedeltaIndex(IndexBase):
|
|
216
|
+
_name = AnyField("name")
|
|
217
|
+
_data = NDArrayField("data")
|
|
218
|
+
_unit = AnyField("unit")
|
|
219
|
+
_freq = AnyField("freq")
|
|
220
|
+
_start = AnyField("start")
|
|
221
|
+
_periods = AnyField("periods")
|
|
222
|
+
_end = AnyField("end")
|
|
223
|
+
_closed = AnyField("closed")
|
|
224
|
+
|
|
225
|
+
@property
|
|
226
|
+
def inferred_type(self):
|
|
227
|
+
return "timedelta64"
|
|
228
|
+
|
|
229
|
+
class PeriodIndex(IndexBase):
|
|
230
|
+
_name = AnyField("name")
|
|
231
|
+
_data = NDArrayField("data")
|
|
232
|
+
_freq = AnyField("freq")
|
|
233
|
+
_start = AnyField("start")
|
|
234
|
+
_periods = AnyField("periods")
|
|
235
|
+
_end = AnyField("end")
|
|
236
|
+
_year = AnyField("year")
|
|
237
|
+
_month = AnyField("month")
|
|
238
|
+
_quarter = AnyField("quarter")
|
|
239
|
+
_day = AnyField("day")
|
|
240
|
+
_hour = AnyField("hour")
|
|
241
|
+
_minute = AnyField("minute")
|
|
242
|
+
_second = AnyField("second")
|
|
243
|
+
_tz = AnyField("tz")
|
|
244
|
+
_dtype = DataTypeField("dtype")
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def inferred_type(self):
|
|
248
|
+
return "period"
|
|
249
|
+
|
|
250
|
+
class Int64Index(IndexBase):
|
|
251
|
+
_pd_initializer = pd.Index
|
|
252
|
+
|
|
253
|
+
_name = AnyField("name")
|
|
254
|
+
_data = NDArrayField("data")
|
|
255
|
+
_dtype = DataTypeField("dtype")
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def dtype(self):
|
|
259
|
+
return getattr(self, "_dtype", None)
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def inferred_type(self):
|
|
263
|
+
return "integer"
|
|
264
|
+
|
|
265
|
+
class UInt64Index(IndexBase):
|
|
266
|
+
_pd_initializer = pd.Index
|
|
267
|
+
|
|
268
|
+
_name = AnyField("name")
|
|
269
|
+
_data = NDArrayField("data")
|
|
270
|
+
_dtype = DataTypeField("dtype")
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def dtype(self):
|
|
274
|
+
return getattr(self, "_dtype", None)
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def inferred_type(self):
|
|
278
|
+
return "integer"
|
|
279
|
+
|
|
280
|
+
class Float64Index(IndexBase):
|
|
281
|
+
_pd_initializer = pd.Index
|
|
282
|
+
|
|
283
|
+
_name = AnyField("name")
|
|
284
|
+
_data = NDArrayField("data")
|
|
285
|
+
_dtype = DataTypeField("dtype")
|
|
286
|
+
|
|
287
|
+
@property
|
|
288
|
+
def dtype(self):
|
|
289
|
+
return getattr(self, "_dtype", None)
|
|
290
|
+
|
|
291
|
+
@property
|
|
292
|
+
def inferred_type(self):
|
|
293
|
+
return "floating"
|
|
294
|
+
|
|
295
|
+
class MultiIndex(IndexBase):
|
|
296
|
+
_names = ListField("names", on_serialize=list)
|
|
297
|
+
_dtypes = ListField("dtypes", on_serialize=list)
|
|
298
|
+
_data = NDArrayField("data")
|
|
299
|
+
_sortorder = Int32Field("sortorder")
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def inferred_type(self):
|
|
303
|
+
return "mixed"
|
|
304
|
+
|
|
305
|
+
@property
|
|
306
|
+
def names(self) -> list:
|
|
307
|
+
return self._names
|
|
308
|
+
|
|
309
|
+
@property
|
|
310
|
+
def dtypes(self) -> pd.Series:
|
|
311
|
+
return pd.Series(self._dtypes, index=self._names)
|
|
312
|
+
|
|
313
|
+
def to_pandas(self):
|
|
314
|
+
data = getattr(self, "_data", None)
|
|
315
|
+
sortorder = getattr(self, "_sortorder", None)
|
|
316
|
+
|
|
317
|
+
def _build_empty_array(dtype):
|
|
318
|
+
try:
|
|
319
|
+
return np.array([], dtype=dtype)
|
|
320
|
+
except TypeError: # pragma: no cover
|
|
321
|
+
return pd.array([], dtype=dtype)
|
|
322
|
+
|
|
323
|
+
if data is None:
|
|
324
|
+
return pd.MultiIndex.from_arrays(
|
|
325
|
+
[_build_empty_array(dtype) for dtype in self._dtypes],
|
|
326
|
+
sortorder=sortorder,
|
|
327
|
+
names=self._names,
|
|
328
|
+
)
|
|
329
|
+
return pd.MultiIndex.from_tuples(
|
|
330
|
+
[tuple(d) for d in data], sortorder=sortorder, names=self._names
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
_index_value = OneOfField(
|
|
334
|
+
"index_value",
|
|
335
|
+
index=Index,
|
|
336
|
+
range_index=RangeIndex,
|
|
337
|
+
categorical_index=CategoricalIndex,
|
|
338
|
+
interval_index=IntervalIndex,
|
|
339
|
+
datetime_index=DatetimeIndex,
|
|
340
|
+
timedelta_index=TimedeltaIndex,
|
|
341
|
+
period_index=PeriodIndex,
|
|
342
|
+
int64_index=Int64Index,
|
|
343
|
+
uint64_index=UInt64Index,
|
|
344
|
+
float64_index=Float64Index,
|
|
345
|
+
multi_index=MultiIndex,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def __maxframe_tokenize__(self):
|
|
349
|
+
# return object for tokenize
|
|
350
|
+
v = self._index_value
|
|
351
|
+
return v._key
|
|
352
|
+
|
|
353
|
+
@property
|
|
354
|
+
def value(self):
|
|
355
|
+
return self._index_value
|
|
356
|
+
|
|
357
|
+
@property
|
|
358
|
+
def key(self):
|
|
359
|
+
return self._index_value.key
|
|
360
|
+
|
|
361
|
+
@property
|
|
362
|
+
def is_monotonic_increasing(self):
|
|
363
|
+
return self._index_value.is_monotonic_increasing
|
|
364
|
+
|
|
365
|
+
@property
|
|
366
|
+
def is_monotonic_decreasing(self):
|
|
367
|
+
return self._index_value.is_monotonic_decreasing
|
|
368
|
+
|
|
369
|
+
@property
|
|
370
|
+
def is_monotonic_increasing_or_decreasing(self):
|
|
371
|
+
return self.is_monotonic_increasing or self.is_monotonic_decreasing
|
|
372
|
+
|
|
373
|
+
@property
|
|
374
|
+
def is_unique(self):
|
|
375
|
+
return self._index_value.is_unique
|
|
376
|
+
|
|
377
|
+
@property
|
|
378
|
+
def min_val(self):
|
|
379
|
+
return self._index_value.min_val
|
|
380
|
+
|
|
381
|
+
@property
|
|
382
|
+
def min_val_close(self):
|
|
383
|
+
return self._index_value.min_val_close
|
|
384
|
+
|
|
385
|
+
@property
|
|
386
|
+
def max_val(self):
|
|
387
|
+
return self._index_value.max_val
|
|
388
|
+
|
|
389
|
+
@property
|
|
390
|
+
def max_val_close(self):
|
|
391
|
+
return self._index_value.max_val_close
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def min_max(self):
|
|
395
|
+
return (
|
|
396
|
+
self._index_value.min_val,
|
|
397
|
+
self._index_value.min_val_close,
|
|
398
|
+
self._index_value.max_val,
|
|
399
|
+
self._index_value.max_val_close,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
@property
|
|
403
|
+
def name(self):
|
|
404
|
+
return getattr(self._index_value, "_name", None)
|
|
405
|
+
|
|
406
|
+
@property
|
|
407
|
+
def names(self):
|
|
408
|
+
return getattr(self._index_value, "_names", [self.name])
|
|
409
|
+
|
|
410
|
+
@property
|
|
411
|
+
def inferred_type(self):
|
|
412
|
+
return self._index_value.inferred_type
|
|
413
|
+
|
|
414
|
+
def has_value(self):
|
|
415
|
+
if isinstance(self._index_value, self.RangeIndex):
|
|
416
|
+
if np.isnan(self._index_value.max_val):
|
|
417
|
+
return False
|
|
418
|
+
else:
|
|
419
|
+
return True
|
|
420
|
+
elif getattr(self._index_value, "_data", None) is not None:
|
|
421
|
+
return True
|
|
422
|
+
return False
|
|
423
|
+
|
|
424
|
+
def to_pandas(self):
|
|
425
|
+
return self._index_value.to_pandas()
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
class DtypesValue(Serializable):
|
|
429
|
+
"""
|
|
430
|
+
Meta class for dtypes.
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
__slots__ = ()
|
|
434
|
+
|
|
435
|
+
_key = StringField("key")
|
|
436
|
+
_value = SeriesField("value")
|
|
437
|
+
|
|
438
|
+
def __init__(self, key=None, value=None, **kw):
|
|
439
|
+
super().__init__(_key=key, _value=value, **kw)
|
|
440
|
+
if self._key is None:
|
|
441
|
+
self._key = tokenize(self._value)
|
|
442
|
+
|
|
443
|
+
@property
|
|
444
|
+
def key(self):
|
|
445
|
+
return self._key
|
|
446
|
+
|
|
447
|
+
@property
|
|
448
|
+
def value(self):
|
|
449
|
+
return self._value
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def refresh_index_value(tileable: ENTITY_TYPE):
|
|
453
|
+
index_to_index_values = dict()
|
|
454
|
+
for chunk in tileable.chunks:
|
|
455
|
+
if chunk.ndim == 1 or chunk.index[1] == 0:
|
|
456
|
+
index_to_index_values[chunk.index] = chunk.index_value
|
|
457
|
+
index_value = merge_index_value(index_to_index_values, store_data=False)
|
|
458
|
+
# keep key as original index_value's
|
|
459
|
+
index_value._index_value._key = tileable.index_value.key
|
|
460
|
+
tileable._index_value = index_value
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def refresh_dtypes(tileable: ENTITY_TYPE):
|
|
464
|
+
all_dtypes = [c.dtypes_value.value for c in tileable.chunks if c.index[0] == 0]
|
|
465
|
+
dtypes = pd.concat(all_dtypes)
|
|
466
|
+
tileable._dtypes = dtypes
|
|
467
|
+
columns_values = parse_index(dtypes.index, store_data=True)
|
|
468
|
+
tileable._columns_value = columns_values
|
|
469
|
+
tileable._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
_tileable_key_property = "_tileable_key"
|
|
473
|
+
_tileable_dtypes_property = "_tileable_dtypes"
|
|
474
|
+
_tileable_index_value_property = "_tileable_index_value"
|
|
475
|
+
_tileable_columns_value_property = "_tileable_columns_value"
|
|
476
|
+
_nsplits_property = "_tileable_nsplits"
|
|
477
|
+
_lazy_chunk_meta_properties = (
|
|
478
|
+
_tileable_key_property,
|
|
479
|
+
_tileable_dtypes_property,
|
|
480
|
+
_tileable_index_value_property,
|
|
481
|
+
_tileable_columns_value_property,
|
|
482
|
+
_nsplits_property,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _calc_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
|
|
487
|
+
return [0] + np.cumsum(nsplit).tolist()
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def calc_cum_nsplits(nsplits: Tuple[Tuple[int]]) -> List[List[int]]:
|
|
491
|
+
return tuple(_calc_cum_nsplit(nsplit) for nsplit in nsplits)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
@functools.lru_cache(maxsize=128)
|
|
495
|
+
def _get_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
|
|
496
|
+
return _calc_cum_nsplit(nsplit)
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def _calc_axis_slice(nsplit: Tuple[int], index: int) -> slice:
|
|
500
|
+
if not isinstance(nsplit, tuple):
|
|
501
|
+
nsplit = tuple(nsplit)
|
|
502
|
+
cum_nsplit = _get_cum_nsplit(nsplit)
|
|
503
|
+
return slice(cum_nsplit[index], cum_nsplit[index + 1])
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _on_deserialize_index_value(index_value):
|
|
507
|
+
if index_value is None:
|
|
508
|
+
return
|
|
509
|
+
try:
|
|
510
|
+
getattr(index_value, "value")
|
|
511
|
+
return index_value
|
|
512
|
+
except AttributeError:
|
|
513
|
+
return
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
class _ToPandasMixin(_ExecuteAndFetchMixin):
|
|
517
|
+
__slots__ = ()
|
|
518
|
+
|
|
519
|
+
def to_pandas(self, session=None, **kw):
|
|
520
|
+
return self._execute_and_fetch(session=session, **kw)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
class _BatchedFetcher:
|
|
524
|
+
__slots__ = ()
|
|
525
|
+
|
|
526
|
+
def _iter(self, batch_size=None, session=None, **kw):
|
|
527
|
+
from .indexing.iloc import iloc
|
|
528
|
+
|
|
529
|
+
if batch_size is not None:
|
|
530
|
+
size = self.shape[0]
|
|
531
|
+
n_batch = ceildiv(size, batch_size)
|
|
532
|
+
|
|
533
|
+
if n_batch > 1:
|
|
534
|
+
for i in range(n_batch):
|
|
535
|
+
batch_data = iloc(self)[batch_size * i : batch_size * (i + 1)]
|
|
536
|
+
yield batch_data._fetch(session=session, **kw)
|
|
537
|
+
else:
|
|
538
|
+
yield self._fetch(session=session, **kw)
|
|
539
|
+
else:
|
|
540
|
+
# if batch_size is not specified, use first batch to estimate
|
|
541
|
+
# batch_size.
|
|
542
|
+
default_batch_bytes = 50 * 1024**2
|
|
543
|
+
first_batch = 1000
|
|
544
|
+
size = self.shape[0]
|
|
545
|
+
|
|
546
|
+
if size >= first_batch:
|
|
547
|
+
batch_data = iloc(self)[:first_batch]
|
|
548
|
+
first_batch_data = batch_data._fetch(session=session, **kw)
|
|
549
|
+
yield first_batch_data
|
|
550
|
+
data_size = estimate_pandas_size(first_batch_data)
|
|
551
|
+
batch_size = int(default_batch_bytes / data_size * first_batch)
|
|
552
|
+
n_batch = ceildiv(size - 1000, batch_size)
|
|
553
|
+
for i in range(n_batch):
|
|
554
|
+
batch_data = iloc(self)[
|
|
555
|
+
first_batch
|
|
556
|
+
+ batch_size * i : first_batch
|
|
557
|
+
+ batch_size * (i + 1)
|
|
558
|
+
]
|
|
559
|
+
yield batch_data._fetch(session=session, **kw)
|
|
560
|
+
else:
|
|
561
|
+
yield self._fetch(session=session, **kw)
|
|
562
|
+
|
|
563
|
+
def iterbatch(self, batch_size=None, session=None, **kw):
|
|
564
|
+
# stop triggering execution under build mode
|
|
565
|
+
if is_build_mode():
|
|
566
|
+
raise ValueError("Cannot fetch data under build mode")
|
|
567
|
+
|
|
568
|
+
# trigger execution
|
|
569
|
+
self.execute(session=session, **kw)
|
|
570
|
+
return self._iter(batch_size=batch_size, session=session)
|
|
571
|
+
|
|
572
|
+
def fetch(self, session=None, **kw):
|
|
573
|
+
from .indexing.iloc import DataFrameIlocGetItem, SeriesIlocGetItem
|
|
574
|
+
|
|
575
|
+
batch_size = kw.pop("batch_size", None)
|
|
576
|
+
if isinstance(self.op, (DataFrameIlocGetItem, SeriesIlocGetItem)):
|
|
577
|
+
# see GH#1871
|
|
578
|
+
# already iloc, do not trigger batch fetch
|
|
579
|
+
return self._fetch(session=session, **kw)
|
|
580
|
+
else:
|
|
581
|
+
batches = list(self._iter(batch_size=batch_size, session=session, **kw))
|
|
582
|
+
return pd.concat(batches) if len(batches) > 1 else batches[0]
|
|
583
|
+
|
|
584
|
+
def fetch_infos(self, fields=None, session=None, **kw):
|
|
585
|
+
return self._fetch_infos(fields=fields, session=session, **kw)
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
589
|
+
__slots__ = ()
|
|
590
|
+
type_name = "Index"
|
|
591
|
+
|
|
592
|
+
# optional field
|
|
593
|
+
_dtype = DataTypeField("dtype")
|
|
594
|
+
_name = AnyField("name")
|
|
595
|
+
_names = AnyField("names")
|
|
596
|
+
_index_value = ReferenceField(
|
|
597
|
+
"index_value", IndexValue, on_deserialize=_on_deserialize_index_value
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
def __init__(
|
|
601
|
+
self,
|
|
602
|
+
op=None,
|
|
603
|
+
shape=None,
|
|
604
|
+
nsplits=None,
|
|
605
|
+
dtype=None,
|
|
606
|
+
name=None,
|
|
607
|
+
names=None,
|
|
608
|
+
index_value=None,
|
|
609
|
+
**kw,
|
|
610
|
+
):
|
|
611
|
+
super().__init__(
|
|
612
|
+
_op=op,
|
|
613
|
+
_shape=shape,
|
|
614
|
+
_nsplits=nsplits,
|
|
615
|
+
_dtype=dtype,
|
|
616
|
+
_name=name,
|
|
617
|
+
_names=names,
|
|
618
|
+
_index_value=index_value,
|
|
619
|
+
**kw,
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
@property
|
|
623
|
+
def params(self) -> Dict[str, Any]:
|
|
624
|
+
# params return the properties which useful to rebuild a new tileable object
|
|
625
|
+
return {
|
|
626
|
+
"shape": self.shape,
|
|
627
|
+
"dtype": self.dtype,
|
|
628
|
+
"name": self.name,
|
|
629
|
+
"index_value": self.index_value,
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
@params.setter
|
|
633
|
+
def params(self, new_params: Dict[str, Any]):
|
|
634
|
+
params = new_params.copy()
|
|
635
|
+
new_shape = params.pop("shape", None)
|
|
636
|
+
if new_shape is not None:
|
|
637
|
+
self._shape = new_shape
|
|
638
|
+
dtype = params.pop("dtype", None)
|
|
639
|
+
if dtype is not None:
|
|
640
|
+
self._dtype = dtype
|
|
641
|
+
index_value = params.pop("index_value", None)
|
|
642
|
+
if index_value is not None:
|
|
643
|
+
self._index_value = index_value
|
|
644
|
+
name = params.pop("name", None)
|
|
645
|
+
if name is not None:
|
|
646
|
+
self._name = name
|
|
647
|
+
if params: # pragma: no cover
|
|
648
|
+
raise TypeError(f"Unknown params: {list(params)}")
|
|
649
|
+
|
|
650
|
+
def refresh_params(self):
|
|
651
|
+
# refresh params when chunks updated
|
|
652
|
+
refresh_tileable_shape(self)
|
|
653
|
+
fill_chunk_slices(self)
|
|
654
|
+
# refresh_index_value(self)
|
|
655
|
+
# if self._dtype is None:
|
|
656
|
+
# self._dtype = self.chunks[0].dtype
|
|
657
|
+
# if self._name is None:
|
|
658
|
+
# self._name = self.chunks[0].name
|
|
659
|
+
|
|
660
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
661
|
+
pass
|
|
662
|
+
|
|
663
|
+
def _to_str(self, representation=False):
|
|
664
|
+
if is_build_mode() or len(self._executed_sessions) == 0:
|
|
665
|
+
# in build mode, or not executed, just return representation
|
|
666
|
+
if representation:
|
|
667
|
+
return f"Index <op={type(self._op).__name__}, key={self.key}"
|
|
668
|
+
else:
|
|
669
|
+
return f"Index(op={type(self._op).__name__})"
|
|
670
|
+
else:
|
|
671
|
+
data = self.fetch(session=self._executed_sessions[-1])
|
|
672
|
+
return repr(data) if repr(data) else str(data)
|
|
673
|
+
|
|
674
|
+
def __str__(self):
|
|
675
|
+
return self._to_str(representation=False)
|
|
676
|
+
|
|
677
|
+
def __repr__(self):
|
|
678
|
+
return self._to_str(representation=True)
|
|
679
|
+
|
|
680
|
+
def _to_maxframe_tensor(self, dtype=None, order="K", extract_multi_index=False):
|
|
681
|
+
tensor = self.to_tensor(extract_multi_index=extract_multi_index)
|
|
682
|
+
dtype = dtype if dtype is not None else tensor.dtype
|
|
683
|
+
return tensor.astype(dtype=dtype, order=order, copy=False)
|
|
684
|
+
|
|
685
|
+
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
686
|
+
return self._to_maxframe_tensor(dtype=dtype, order=order)
|
|
687
|
+
|
|
688
|
+
@property
|
|
689
|
+
def dtype(self):
|
|
690
|
+
return getattr(self, "_dtype", None) or self.op.dtype
|
|
691
|
+
|
|
692
|
+
@property
|
|
693
|
+
def name(self):
|
|
694
|
+
return self._name
|
|
695
|
+
|
|
696
|
+
@property
|
|
697
|
+
def names(self):
|
|
698
|
+
return getattr(self, "_names", None) or [self.name]
|
|
699
|
+
|
|
700
|
+
@property
|
|
701
|
+
def nlevels(self) -> int:
|
|
702
|
+
return len(self.names)
|
|
703
|
+
|
|
704
|
+
@property
|
|
705
|
+
def index_value(self) -> IndexValue:
|
|
706
|
+
return self._index_value
|
|
707
|
+
|
|
708
|
+
@property
|
|
709
|
+
def inferred_type(self):
|
|
710
|
+
return self._index_value.inferred_type
|
|
711
|
+
|
|
712
|
+
def to_tensor(self, dtype=None, extract_multi_index=False):
|
|
713
|
+
from ..tensor.datasource.from_dataframe import from_index
|
|
714
|
+
|
|
715
|
+
return from_index(self, dtype=dtype, extract_multi_index=extract_multi_index)
|
|
716
|
+
|
|
717
|
+
def to_frame(self, index: bool = True, name=None):
|
|
718
|
+
from . import dataframe_from_tensor
|
|
719
|
+
|
|
720
|
+
if isinstance(self.index_value.value, IndexValue.MultiIndex):
|
|
721
|
+
old_names = self.index_value.value.names
|
|
722
|
+
|
|
723
|
+
if (
|
|
724
|
+
name is not None
|
|
725
|
+
and not isinstance(name, Iterable)
|
|
726
|
+
or isinstance(name, str)
|
|
727
|
+
):
|
|
728
|
+
raise TypeError("'name' must be a list / sequence of column names.")
|
|
729
|
+
|
|
730
|
+
name = list(name if name is not None else old_names)
|
|
731
|
+
if len(name) != len(old_names):
|
|
732
|
+
raise ValueError(
|
|
733
|
+
"'name' should have same length as number of levels on index."
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
columns = [
|
|
737
|
+
old or new or idx for idx, (old, new) in enumerate(zip(old_names, name))
|
|
738
|
+
]
|
|
739
|
+
else:
|
|
740
|
+
columns = [name or self.name or 0]
|
|
741
|
+
index_ = self if index else None
|
|
742
|
+
return dataframe_from_tensor(
|
|
743
|
+
self._to_maxframe_tensor(extract_multi_index=True),
|
|
744
|
+
index=index_,
|
|
745
|
+
columns=columns,
|
|
746
|
+
check_index_size=False,
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
def to_series(self, index=None, name=None):
|
|
750
|
+
from . import series_from_index
|
|
751
|
+
|
|
752
|
+
return series_from_index(self, index=index, name=name)
|
|
753
|
+
|
|
754
|
+
@property
|
|
755
|
+
def hasnans(self):
|
|
756
|
+
return self.isna().any()
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
class Index(HasShapeTileable, _ToPandasMixin):
|
|
760
|
+
__slots__ = "_df_or_series", "_parent_key", "_axis"
|
|
761
|
+
_allow_data_type_ = (IndexData,)
|
|
762
|
+
type_name = "Index"
|
|
763
|
+
|
|
764
|
+
def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
|
|
765
|
+
if data is not None and not isinstance(data, pd.Index):
|
|
766
|
+
# create corresponding Index class
|
|
767
|
+
# according to type of index_value
|
|
768
|
+
clz = globals()[type(data.index_value.value).__name__]
|
|
769
|
+
else:
|
|
770
|
+
clz = cls
|
|
771
|
+
return object.__new__(clz)
|
|
772
|
+
|
|
773
|
+
def __len__(self):
|
|
774
|
+
return len(self._data)
|
|
775
|
+
|
|
776
|
+
def __class_getitem__(cls, item):
|
|
777
|
+
return IndexType.from_getitem_args(item)
|
|
778
|
+
|
|
779
|
+
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
780
|
+
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
781
|
+
|
|
782
|
+
def _get_df_or_series(self):
|
|
783
|
+
obj = getattr(self, "_df_or_series", None)
|
|
784
|
+
if obj is not None:
|
|
785
|
+
return obj()
|
|
786
|
+
return None
|
|
787
|
+
|
|
788
|
+
def _set_df_or_series(self, df_or_series, axis):
|
|
789
|
+
self._df_or_series = weakref.ref(df_or_series)
|
|
790
|
+
self._parent_key = df_or_series.key
|
|
791
|
+
self._axis = axis
|
|
792
|
+
|
|
793
|
+
@property
|
|
794
|
+
def T(self):
|
|
795
|
+
"""Return the transpose, which is by definition self."""
|
|
796
|
+
return self
|
|
797
|
+
|
|
798
|
+
@property
|
|
799
|
+
def name(self):
|
|
800
|
+
return self._data.name
|
|
801
|
+
|
|
802
|
+
@name.setter
|
|
803
|
+
def name(self, value):
|
|
804
|
+
df_or_series = self._get_df_or_series()
|
|
805
|
+
if df_or_series is not None and df_or_series.key == self._parent_key:
|
|
806
|
+
df_or_series.rename_axis(value, axis=self._axis, inplace=True)
|
|
807
|
+
self.data = df_or_series.axes[self._axis].data
|
|
808
|
+
else:
|
|
809
|
+
self.rename(value, inplace=True)
|
|
810
|
+
|
|
811
|
+
@property
|
|
812
|
+
def names(self):
|
|
813
|
+
return self._data.names
|
|
814
|
+
|
|
815
|
+
@names.setter
|
|
816
|
+
def names(self, value):
|
|
817
|
+
df_or_series = self._get_df_or_series()
|
|
818
|
+
if df_or_series is not None:
|
|
819
|
+
df_or_series.rename_axis(value, axis=self._axis, inplace=True)
|
|
820
|
+
self.data = df_or_series.axes[self._axis].data
|
|
821
|
+
else:
|
|
822
|
+
self.rename(value, inplace=True)
|
|
823
|
+
|
|
824
|
+
@property
|
|
825
|
+
def values(self):
|
|
826
|
+
return self.to_tensor()
|
|
827
|
+
|
|
828
|
+
def to_frame(self, index: bool = True, name=None):
|
|
829
|
+
"""
|
|
830
|
+
Create a DataFrame with a column containing the Index.
|
|
831
|
+
|
|
832
|
+
Parameters
|
|
833
|
+
----------
|
|
834
|
+
index : bool, default True
|
|
835
|
+
Set the index of the returned DataFrame as the original Index.
|
|
836
|
+
|
|
837
|
+
name : object, default None
|
|
838
|
+
The passed name should substitute for the index name (if it has
|
|
839
|
+
one).
|
|
840
|
+
|
|
841
|
+
Returns
|
|
842
|
+
-------
|
|
843
|
+
DataFrame
|
|
844
|
+
DataFrame containing the original Index data.
|
|
845
|
+
|
|
846
|
+
See Also
|
|
847
|
+
--------
|
|
848
|
+
Index.to_series : Convert an Index to a Series.
|
|
849
|
+
Series.to_frame : Convert Series to DataFrame.
|
|
850
|
+
|
|
851
|
+
Examples
|
|
852
|
+
--------
|
|
853
|
+
>>> import maxframe.dataframe as md
|
|
854
|
+
>>> idx = md.Index(['Ant', 'Bear', 'Cow'], name='animal')
|
|
855
|
+
>>> idx.to_frame().execute()
|
|
856
|
+
animal
|
|
857
|
+
animal
|
|
858
|
+
Ant Ant
|
|
859
|
+
Bear Bear
|
|
860
|
+
Cow Cow
|
|
861
|
+
|
|
862
|
+
By default, the original Index is reused. To enforce a new Index:
|
|
863
|
+
|
|
864
|
+
>>> idx.to_frame(index=False).execute()
|
|
865
|
+
animal
|
|
866
|
+
0 Ant
|
|
867
|
+
1 Bear
|
|
868
|
+
2 Cow
|
|
869
|
+
|
|
870
|
+
To override the name of the resulting column, specify `name`:
|
|
871
|
+
|
|
872
|
+
>>> idx.to_frame(index=False, name='zoo').execute()
|
|
873
|
+
zoo
|
|
874
|
+
0 Ant
|
|
875
|
+
1 Bear
|
|
876
|
+
2 Cow
|
|
877
|
+
"""
|
|
878
|
+
return self._data.to_frame(index=index, name=name)
|
|
879
|
+
|
|
880
|
+
def to_series(self, index=None, name=None):
|
|
881
|
+
"""
|
|
882
|
+
Create a Series with both index and values equal to the index keys.
|
|
883
|
+
|
|
884
|
+
Useful with map for returning an indexer based on an index.
|
|
885
|
+
|
|
886
|
+
Parameters
|
|
887
|
+
----------
|
|
888
|
+
index : Index, optional
|
|
889
|
+
Index of resulting Series. If None, defaults to original index.
|
|
890
|
+
name : str, optional
|
|
891
|
+
Dame of resulting Series. If None, defaults to name of original
|
|
892
|
+
index.
|
|
893
|
+
|
|
894
|
+
Returns
|
|
895
|
+
-------
|
|
896
|
+
Series
|
|
897
|
+
The dtype will be based on the type of the Index values.
|
|
898
|
+
"""
|
|
899
|
+
return self._data.to_series(index=index, name=name)
|
|
900
|
+
|
|
901
|
+
@property
|
|
902
|
+
def hasnans(self):
|
|
903
|
+
"""
|
|
904
|
+
Return True if there are any NaNs.
|
|
905
|
+
|
|
906
|
+
Returns
|
|
907
|
+
-------
|
|
908
|
+
bool
|
|
909
|
+
|
|
910
|
+
Examples
|
|
911
|
+
--------
|
|
912
|
+
>>> import maxframe.dataframe as md
|
|
913
|
+
>>> idx = md.Index([1, 2, 3, None])
|
|
914
|
+
>>> idx.execute()
|
|
915
|
+
Index([1.0, 2.0, 3.0, nan], dtype='float64')
|
|
916
|
+
>>> idx.hasnans.execute()
|
|
917
|
+
True
|
|
918
|
+
"""
|
|
919
|
+
return self._data.hasnans
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
class RangeIndex(Index):
|
|
923
|
+
__slots__ = ()
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
class CategoricalIndex(Index):
|
|
927
|
+
__slots__ = ()
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
class IntervalIndex(Index):
|
|
931
|
+
__slots__ = ()
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
class DatetimeIndex(Index):
|
|
935
|
+
__slots__ = ()
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
class TimedeltaIndex(Index):
|
|
939
|
+
__slots__ = ()
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
class PeriodIndex(Index):
|
|
943
|
+
__slots__ = ()
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
class Int64Index(Index):
|
|
947
|
+
__slots__ = ()
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
class UInt64Index(Index):
|
|
951
|
+
__slots__ = ()
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
class Float64Index(Index):
|
|
955
|
+
__slots__ = ()
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
class MultiIndex(Index):
|
|
959
|
+
__slots__ = ()
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
963
|
+
__slots__ = "_cache", "_accessors"
|
|
964
|
+
|
|
965
|
+
# optional field
|
|
966
|
+
_dtype = DataTypeField("dtype")
|
|
967
|
+
_name = AnyField("name")
|
|
968
|
+
_index_value = ReferenceField(
|
|
969
|
+
"index_value", IndexValue, on_deserialize=_on_deserialize_index_value
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
def __init__(
|
|
973
|
+
self,
|
|
974
|
+
op=None,
|
|
975
|
+
shape=None,
|
|
976
|
+
nsplits=None,
|
|
977
|
+
dtype=None,
|
|
978
|
+
name=None,
|
|
979
|
+
index_value=None,
|
|
980
|
+
**kw,
|
|
981
|
+
):
|
|
982
|
+
super().__init__(
|
|
983
|
+
_op=op,
|
|
984
|
+
_shape=shape,
|
|
985
|
+
_nsplits=nsplits,
|
|
986
|
+
_dtype=dtype,
|
|
987
|
+
_name=name,
|
|
988
|
+
_index_value=index_value,
|
|
989
|
+
**kw,
|
|
990
|
+
)
|
|
991
|
+
self._accessors = dict()
|
|
992
|
+
|
|
993
|
+
def _get_params(self) -> Dict[str, Any]:
|
|
994
|
+
# params return the properties which useful to rebuild a new tileable object
|
|
995
|
+
return {
|
|
996
|
+
"shape": self.shape,
|
|
997
|
+
"dtype": self.dtype,
|
|
998
|
+
"name": self.name,
|
|
999
|
+
"index_value": self.index_value,
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
def _set_params(self, new_params: Dict[str, Any]):
|
|
1003
|
+
params = new_params.copy()
|
|
1004
|
+
new_shape = params.pop("shape", None)
|
|
1005
|
+
if new_shape is not None:
|
|
1006
|
+
self._shape = new_shape
|
|
1007
|
+
dtype = params.pop("dtype", None)
|
|
1008
|
+
if dtype is not None:
|
|
1009
|
+
self._dtype = dtype
|
|
1010
|
+
index_value = params.pop("index_value", None)
|
|
1011
|
+
if index_value is not None:
|
|
1012
|
+
self._index_value = index_value
|
|
1013
|
+
name = params.pop("name", None)
|
|
1014
|
+
if name is not None:
|
|
1015
|
+
self._name = name
|
|
1016
|
+
if params: # pragma: no cover
|
|
1017
|
+
raise TypeError(f"Unknown params: {list(params)}")
|
|
1018
|
+
|
|
1019
|
+
params = property(_get_params, _set_params)
|
|
1020
|
+
|
|
1021
|
+
def refresh_params(self):
|
|
1022
|
+
# refresh params when chunks updated
|
|
1023
|
+
refresh_tileable_shape(self)
|
|
1024
|
+
fill_chunk_slices(self)
|
|
1025
|
+
# refresh_index_value(self)
|
|
1026
|
+
if self._dtype is None:
|
|
1027
|
+
self._dtype = getattr(self.chunks[0], "dtype", None)
|
|
1028
|
+
# if self._name is None:
|
|
1029
|
+
# self._name = self.chunks[0].name
|
|
1030
|
+
|
|
1031
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
1032
|
+
pass
|
|
1033
|
+
|
|
1034
|
+
def _to_str(self, representation=False):
|
|
1035
|
+
if is_build_mode() or len(self._executed_sessions) == 0:
|
|
1036
|
+
# in build mode, or not executed, just return representation
|
|
1037
|
+
if representation:
|
|
1038
|
+
return (
|
|
1039
|
+
f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
|
|
1040
|
+
)
|
|
1041
|
+
else:
|
|
1042
|
+
return f"{self.type_name}(op={type(self._op).__name__})"
|
|
1043
|
+
else:
|
|
1044
|
+
corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
|
|
1045
|
+
|
|
1046
|
+
buf = StringIO()
|
|
1047
|
+
max_rows = pd.get_option("display.max_rows")
|
|
1048
|
+
corner_max_rows = (
|
|
1049
|
+
max_rows
|
|
1050
|
+
if self.shape[0] <= max_rows or corner_data.shape[0] == 0
|
|
1051
|
+
else corner_data.shape[0] - 1
|
|
1052
|
+
) # make sure max_rows < corner_data
|
|
1053
|
+
|
|
1054
|
+
with pd.option_context("display.max_rows", corner_max_rows):
|
|
1055
|
+
if self.shape[0] <= max_rows:
|
|
1056
|
+
corner_series = corner_data
|
|
1057
|
+
else:
|
|
1058
|
+
corner_series = ReprSeries(corner_data, self.shape)
|
|
1059
|
+
buf.write(repr(corner_series) if representation else str(corner_series))
|
|
1060
|
+
|
|
1061
|
+
return buf.getvalue()
|
|
1062
|
+
|
|
1063
|
+
def __str__(self):
|
|
1064
|
+
return self._to_str(representation=False)
|
|
1065
|
+
|
|
1066
|
+
def __repr__(self):
|
|
1067
|
+
return self._to_str(representation=True)
|
|
1068
|
+
|
|
1069
|
+
@property
|
|
1070
|
+
def dtype(self):
|
|
1071
|
+
return getattr(self, "_dtype", None) or getattr(self.op, "dtype", None)
|
|
1072
|
+
|
|
1073
|
+
@property
|
|
1074
|
+
def name(self):
|
|
1075
|
+
return self._name
|
|
1076
|
+
|
|
1077
|
+
@property
|
|
1078
|
+
def index_value(self):
|
|
1079
|
+
return self._index_value
|
|
1080
|
+
|
|
1081
|
+
@property
|
|
1082
|
+
def index(self):
|
|
1083
|
+
from .datasource.index import from_tileable
|
|
1084
|
+
|
|
1085
|
+
return from_tileable(self)
|
|
1086
|
+
|
|
1087
|
+
@property
|
|
1088
|
+
def axes(self):
|
|
1089
|
+
return [self.index]
|
|
1090
|
+
|
|
1091
|
+
@property
|
|
1092
|
+
def empty(self):
|
|
1093
|
+
shape = getattr(self, "_shape")
|
|
1094
|
+
if np.any(np.isnan(shape)):
|
|
1095
|
+
raise ValueError("Tileable object must be executed first")
|
|
1096
|
+
return shape == (0,)
|
|
1097
|
+
|
|
1098
|
+
def to_tensor(self, dtype=None):
|
|
1099
|
+
from ..tensor.datasource.from_dataframe import from_series
|
|
1100
|
+
|
|
1101
|
+
return from_series(self, dtype=dtype)
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
class SeriesData(_BatchedFetcher, BaseSeriesData):
|
|
1105
|
+
type_name = "Series"
|
|
1106
|
+
|
|
1107
|
+
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
1108
|
+
tensor = self.to_tensor()
|
|
1109
|
+
dtype = dtype if dtype is not None else tensor.dtype
|
|
1110
|
+
return tensor.astype(dtype=dtype, order=order, copy=False)
|
|
1111
|
+
|
|
1112
|
+
def iteritems(self, batch_size=10000, session=None):
|
|
1113
|
+
method_name = "iteritems" if _df_with_iteritems else "items"
|
|
1114
|
+
for batch_data in self.iterbatch(batch_size=batch_size, session=session):
|
|
1115
|
+
yield from getattr(batch_data, method_name)()
|
|
1116
|
+
|
|
1117
|
+
items = iteritems
|
|
1118
|
+
|
|
1119
|
+
def to_frame(self, name=None):
|
|
1120
|
+
from . import dataframe_from_tensor
|
|
1121
|
+
|
|
1122
|
+
name = name or self.name or 0
|
|
1123
|
+
return dataframe_from_tensor(self, columns=[name])
|
|
1124
|
+
|
|
1125
|
+
@property
|
|
1126
|
+
def hasnans(self):
|
|
1127
|
+
"""
|
|
1128
|
+
Return True if there are any NaNs.
|
|
1129
|
+
|
|
1130
|
+
Returns
|
|
1131
|
+
-------
|
|
1132
|
+
bool
|
|
1133
|
+
|
|
1134
|
+
Examples
|
|
1135
|
+
--------
|
|
1136
|
+
>>> import maxframe.dataframe as md
|
|
1137
|
+
>>> s = md.Series([1, 2, 3, None])
|
|
1138
|
+
>>> s.execute()
|
|
1139
|
+
0 1.0
|
|
1140
|
+
1 2.0
|
|
1141
|
+
2 3.0
|
|
1142
|
+
3 NaN
|
|
1143
|
+
dtype: float64
|
|
1144
|
+
>>> s.hasnans.execute()
|
|
1145
|
+
True
|
|
1146
|
+
"""
|
|
1147
|
+
return self.isna().any()
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
class Series(HasShapeTileable, _ToPandasMixin):
|
|
1151
|
+
__slots__ = ("_cache",)
|
|
1152
|
+
_allow_data_type_ = (SeriesData,)
|
|
1153
|
+
type_name = "Series"
|
|
1154
|
+
|
|
1155
|
+
def __class_getitem__(cls, item):
|
|
1156
|
+
return SeriesType.from_getitem_args(item)
|
|
1157
|
+
|
|
1158
|
+
def to_tensor(self, dtype=None):
|
|
1159
|
+
return self._data.to_tensor(dtype=dtype)
|
|
1160
|
+
|
|
1161
|
+
def from_tensor(self, in_tensor, index=None, name=None):
|
|
1162
|
+
return self._data.from_tensor(in_tensor, index=index, name=name)
|
|
1163
|
+
|
|
1164
|
+
@property
|
|
1165
|
+
def T(self):
|
|
1166
|
+
"""Return the transpose, which is by definition self."""
|
|
1167
|
+
return self
|
|
1168
|
+
|
|
1169
|
+
@property
|
|
1170
|
+
def ndim(self):
|
|
1171
|
+
"""
|
|
1172
|
+
Return an int representing the number of axes / array dimensions.
|
|
1173
|
+
|
|
1174
|
+
Return 1 if Series. Otherwise return 2 if DataFrame.
|
|
1175
|
+
|
|
1176
|
+
See Also
|
|
1177
|
+
--------
|
|
1178
|
+
ndarray.ndim : Number of array dimensions.
|
|
1179
|
+
|
|
1180
|
+
Examples
|
|
1181
|
+
--------
|
|
1182
|
+
>>> import maxframe.dataframe as md
|
|
1183
|
+
>>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
|
|
1184
|
+
>>> s.ndim
|
|
1185
|
+
1
|
|
1186
|
+
|
|
1187
|
+
>>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
|
|
1188
|
+
>>> df.ndim
|
|
1189
|
+
2
|
|
1190
|
+
"""
|
|
1191
|
+
return super().ndim
|
|
1192
|
+
|
|
1193
|
+
@property
|
|
1194
|
+
def index(self):
|
|
1195
|
+
"""
|
|
1196
|
+
The index (axis labels) of the Series.
|
|
1197
|
+
"""
|
|
1198
|
+
idx = self._data.index
|
|
1199
|
+
idx._set_df_or_series(self, 0)
|
|
1200
|
+
return idx
|
|
1201
|
+
|
|
1202
|
+
@index.setter
|
|
1203
|
+
def index(self, new_index):
|
|
1204
|
+
self.set_axis(new_index, axis=0, inplace=True)
|
|
1205
|
+
|
|
1206
|
+
@property
|
|
1207
|
+
def name(self):
|
|
1208
|
+
return self._data.name
|
|
1209
|
+
|
|
1210
|
+
@name.setter
|
|
1211
|
+
def name(self, val):
|
|
1212
|
+
from .indexing.rename import DataFrameRename
|
|
1213
|
+
|
|
1214
|
+
op = DataFrameRename(new_name=val, output_types=[OutputType.series])
|
|
1215
|
+
new_series = op(self)
|
|
1216
|
+
self.data = new_series.data
|
|
1217
|
+
|
|
1218
|
+
@property
|
|
1219
|
+
def dtype(self):
|
|
1220
|
+
"""
|
|
1221
|
+
Return the dtype object of the underlying data.
|
|
1222
|
+
"""
|
|
1223
|
+
return self._data.dtype
|
|
1224
|
+
|
|
1225
|
+
def copy(self, deep=True): # pylint: disable=arguments-differ
|
|
1226
|
+
"""
|
|
1227
|
+
Make a copy of this object's indices and data.
|
|
1228
|
+
|
|
1229
|
+
When ``deep=True`` (default), a new object will be created with a
|
|
1230
|
+
copy of the calling object's data and indices. Modifications to
|
|
1231
|
+
the data or indices of the copy will not be reflected in the
|
|
1232
|
+
original object (see notes below).
|
|
1233
|
+
|
|
1234
|
+
When ``deep=False``, a new object will be created without copying
|
|
1235
|
+
the calling object's data or index (only references to the data
|
|
1236
|
+
and index are copied). Any changes to the data of the original
|
|
1237
|
+
will be reflected in the shallow copy (and vice versa).
|
|
1238
|
+
|
|
1239
|
+
Parameters
|
|
1240
|
+
----------
|
|
1241
|
+
deep : bool, default True
|
|
1242
|
+
Make a deep copy, including a copy of the data and the indices.
|
|
1243
|
+
With ``deep=False`` neither the indices nor the data are copied.
|
|
1244
|
+
|
|
1245
|
+
Returns
|
|
1246
|
+
-------
|
|
1247
|
+
copy : Series or DataFrame
|
|
1248
|
+
Object type matches caller.
|
|
1249
|
+
"""
|
|
1250
|
+
if deep:
|
|
1251
|
+
return super().copy()
|
|
1252
|
+
else:
|
|
1253
|
+
return super()._view()
|
|
1254
|
+
|
|
1255
|
+
def __iter__(self):
|
|
1256
|
+
# prevent being called by pandas to make sure `__eq__` works
|
|
1257
|
+
prevent_called_from_pandas()
|
|
1258
|
+
return (tp[1] for tp in self.items())
|
|
1259
|
+
|
|
1260
|
+
def __len__(self):
|
|
1261
|
+
return len(self._data)
|
|
1262
|
+
|
|
1263
|
+
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
1264
|
+
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
1265
|
+
|
|
1266
|
+
def keys(self):
|
|
1267
|
+
"""
|
|
1268
|
+
Return alias for index.
|
|
1269
|
+
|
|
1270
|
+
Returns
|
|
1271
|
+
-------
|
|
1272
|
+
Index
|
|
1273
|
+
Index of the Series.
|
|
1274
|
+
"""
|
|
1275
|
+
return self.index
|
|
1276
|
+
|
|
1277
|
+
@property
|
|
1278
|
+
def values(self):
|
|
1279
|
+
return self.to_tensor()
|
|
1280
|
+
|
|
1281
|
+
def iteritems(self, batch_size=10000, session=None):
|
|
1282
|
+
"""
|
|
1283
|
+
Lazily iterate over (index, value) tuples.
|
|
1284
|
+
|
|
1285
|
+
This method returns an iterable tuple (index, value). This is
|
|
1286
|
+
convenient if you want to create a lazy iterator.
|
|
1287
|
+
|
|
1288
|
+
Returns
|
|
1289
|
+
-------
|
|
1290
|
+
iterable
|
|
1291
|
+
Iterable of tuples containing the (index, value) pairs from a
|
|
1292
|
+
Series.
|
|
1293
|
+
|
|
1294
|
+
See Also
|
|
1295
|
+
--------
|
|
1296
|
+
DataFrame.items : Iterate over (column name, Series) pairs.
|
|
1297
|
+
DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
|
|
1298
|
+
|
|
1299
|
+
Examples
|
|
1300
|
+
--------
|
|
1301
|
+
>>> import maxframe.dataframe as md
|
|
1302
|
+
>>> s = md.Series(['A', 'B', 'C'])
|
|
1303
|
+
>>> for index, value in s.items():
|
|
1304
|
+
... print(f"Index : {index}, Value : {value}")
|
|
1305
|
+
Index : 0, Value : A
|
|
1306
|
+
Index : 1, Value : B
|
|
1307
|
+
Index : 2, Value : C
|
|
1308
|
+
"""
|
|
1309
|
+
return self._data.iteritems(batch_size=batch_size, session=session)
|
|
1310
|
+
|
|
1311
|
+
items = iteritems
|
|
1312
|
+
|
|
1313
|
+
def to_frame(self, name=None):
|
|
1314
|
+
"""
|
|
1315
|
+
Convert Series to DataFrame.
|
|
1316
|
+
|
|
1317
|
+
Parameters
|
|
1318
|
+
----------
|
|
1319
|
+
name : object, default None
|
|
1320
|
+
The passed name should substitute for the series name (if it has
|
|
1321
|
+
one).
|
|
1322
|
+
|
|
1323
|
+
Returns
|
|
1324
|
+
-------
|
|
1325
|
+
DataFrame
|
|
1326
|
+
DataFrame representation of Series.
|
|
1327
|
+
|
|
1328
|
+
Examples
|
|
1329
|
+
--------
|
|
1330
|
+
>>> import maxframe.dataframe as md
|
|
1331
|
+
>>> s = md.Series(["a", "b", "c"], name="vals")
|
|
1332
|
+
>>> s.to_frame().execute()
|
|
1333
|
+
vals
|
|
1334
|
+
0 a
|
|
1335
|
+
1 b
|
|
1336
|
+
2 c
|
|
1337
|
+
"""
|
|
1338
|
+
return self._data.to_frame(name=name)
|
|
1339
|
+
|
|
1340
|
+
# def median(
|
|
1341
|
+
# self, axis=None, skipna=True, out=None, overwrite_input=False, keepdims=False
|
|
1342
|
+
# ):
|
|
1343
|
+
# """
|
|
1344
|
+
# Return the median of the values over the requested axis.
|
|
1345
|
+
#
|
|
1346
|
+
# Parameters
|
|
1347
|
+
# ----------
|
|
1348
|
+
# axis : {index (0)}
|
|
1349
|
+
# Axis or axes along which the medians are computed. The default
|
|
1350
|
+
# is to compute the median along a flattened version of the tensor.
|
|
1351
|
+
# A sequence of axes is supported since version 1.9.0.
|
|
1352
|
+
# skipna : bool, optional, default True
|
|
1353
|
+
# Exclude NA/null values when computing the result.
|
|
1354
|
+
# out : Tensor, default None
|
|
1355
|
+
# Output tensor in which to place the result. It must
|
|
1356
|
+
# have the same shape and buffer length as the expected output,
|
|
1357
|
+
# but the type (of the output) will be cast if necessary.
|
|
1358
|
+
# overwrite_input : bool, default False
|
|
1359
|
+
# Just for compatibility with Numpy, would not take effect.
|
|
1360
|
+
# keepdims : bool, default False
|
|
1361
|
+
# If this is set to True, the axes which are reduced are left
|
|
1362
|
+
# in the result as dimensions with size one. With this option,
|
|
1363
|
+
# the result will broadcast correctly against the original `arr`.
|
|
1364
|
+
#
|
|
1365
|
+
# Returns
|
|
1366
|
+
# -------
|
|
1367
|
+
# median : scalar
|
|
1368
|
+
# Return the median of the values over the requested axis.
|
|
1369
|
+
#
|
|
1370
|
+
# See Also
|
|
1371
|
+
# --------
|
|
1372
|
+
# tensor.mean, tensor.percentile
|
|
1373
|
+
#
|
|
1374
|
+
# Notes
|
|
1375
|
+
# -----
|
|
1376
|
+
# Given a vector ``V`` of length ``N``, the median of ``V`` is the
|
|
1377
|
+
# middle value of a sorted copy of ``V``, ``V_sorted`` - i
|
|
1378
|
+
# e., ``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the
|
|
1379
|
+
# two middle values of ``V_sorted`` when ``N`` is even.
|
|
1380
|
+
#
|
|
1381
|
+
# Examples
|
|
1382
|
+
# --------
|
|
1383
|
+
# >>> import maxframe.dataframe as md
|
|
1384
|
+
# >>> a = md.Series([10, 7, 4, 3, 2, 1])
|
|
1385
|
+
# >>> a.median().execute()
|
|
1386
|
+
# 2.0
|
|
1387
|
+
# >>> mt.median(a).execute()
|
|
1388
|
+
# 3.5
|
|
1389
|
+
# >>> a = md.Series([10, 7, 4, None, 2, 1])
|
|
1390
|
+
# >>> a.median().execute()
|
|
1391
|
+
# 4.0
|
|
1392
|
+
# >>> a.median(skipna=False).execute()
|
|
1393
|
+
# nan
|
|
1394
|
+
# """
|
|
1395
|
+
# if skipna:
|
|
1396
|
+
# return statistics.median(
|
|
1397
|
+
# self.dropna(),
|
|
1398
|
+
# axis=None,
|
|
1399
|
+
# out=None,
|
|
1400
|
+
# overwrite_input=False,
|
|
1401
|
+
# keepdims=False,
|
|
1402
|
+
# )
|
|
1403
|
+
# else:
|
|
1404
|
+
# return statistics.median(
|
|
1405
|
+
# self, axis=None, out=None, overwrite_input=False, keepdims=False
|
|
1406
|
+
# )
|
|
1407
|
+
|
|
1408
|
+
|
|
1409
|
+
class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
1410
|
+
__slots__ = "_accessors", "_dtypes_value", "_dtypes_dict"
|
|
1411
|
+
|
|
1412
|
+
# optional fields
|
|
1413
|
+
_dtypes = SeriesField("dtypes")
|
|
1414
|
+
_index_value = ReferenceField(
|
|
1415
|
+
"index_value", IndexValue, on_deserialize=_on_deserialize_index_value
|
|
1416
|
+
)
|
|
1417
|
+
_columns_value = ReferenceField("columns_value", IndexValue)
|
|
1418
|
+
|
|
1419
|
+
def __init__(
|
|
1420
|
+
self,
|
|
1421
|
+
op=None,
|
|
1422
|
+
shape=None,
|
|
1423
|
+
nsplits=None,
|
|
1424
|
+
dtypes=None,
|
|
1425
|
+
index_value=None,
|
|
1426
|
+
columns_value=None,
|
|
1427
|
+
**kw,
|
|
1428
|
+
):
|
|
1429
|
+
super().__init__(
|
|
1430
|
+
_op=op,
|
|
1431
|
+
_shape=shape,
|
|
1432
|
+
_nsplits=nsplits,
|
|
1433
|
+
_dtypes=dtypes,
|
|
1434
|
+
_index_value=index_value,
|
|
1435
|
+
_columns_value=columns_value,
|
|
1436
|
+
**kw,
|
|
1437
|
+
)
|
|
1438
|
+
self._accessors = dict()
|
|
1439
|
+
self._dtypes_value = None
|
|
1440
|
+
self._dtypes_dict = None
|
|
1441
|
+
|
|
1442
|
+
def __on_deserialize__(self):
|
|
1443
|
+
super().__on_deserialize__()
|
|
1444
|
+
self._accessors = dict()
|
|
1445
|
+
self._dtypes_value = None
|
|
1446
|
+
self._dtypes_dict = None
|
|
1447
|
+
|
|
1448
|
+
def _get_params(self) -> Dict[str, Any]:
|
|
1449
|
+
# params return the properties which useful to rebuild a new tileable object
|
|
1450
|
+
return {
|
|
1451
|
+
"shape": self.shape,
|
|
1452
|
+
"dtypes": self.dtypes,
|
|
1453
|
+
"index_value": self.index_value,
|
|
1454
|
+
"columns_value": getattr(self, "columns_value", None),
|
|
1455
|
+
"dtypes_value": getattr(self, "dtypes_value", None),
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
def _set_params(self, new_params: Dict[str, Any]):
|
|
1459
|
+
params = new_params.copy()
|
|
1460
|
+
new_shape = params.pop("shape", None)
|
|
1461
|
+
if new_shape is not None:
|
|
1462
|
+
self._shape = new_shape
|
|
1463
|
+
index_value = params.pop("index_value", None)
|
|
1464
|
+
if index_value is not None:
|
|
1465
|
+
self._index_value = index_value
|
|
1466
|
+
dtypes = params.pop("dtypes", None)
|
|
1467
|
+
if dtypes is not None:
|
|
1468
|
+
self._dtypes = dtypes
|
|
1469
|
+
columns_value = params.pop("columns_value", None)
|
|
1470
|
+
if columns_value is not None:
|
|
1471
|
+
self._columns_value = columns_value
|
|
1472
|
+
dtypes_value = params.pop("dtypes_value", None)
|
|
1473
|
+
if dtypes_value is not None:
|
|
1474
|
+
if dtypes is None:
|
|
1475
|
+
self._dtypes = dtypes_value.value
|
|
1476
|
+
if columns_value is None:
|
|
1477
|
+
self._columns_value = parse_index(self._dtypes.index, store_data=True)
|
|
1478
|
+
self._dtypes_value = dtypes_value
|
|
1479
|
+
if params: # pragma: no cover
|
|
1480
|
+
raise TypeError(f"Unknown params: {list(params)}")
|
|
1481
|
+
|
|
1482
|
+
params = property(_get_params, _set_params)
|
|
1483
|
+
|
|
1484
|
+
def refresh_params(self):
|
|
1485
|
+
# refresh params when chunks updated
|
|
1486
|
+
refresh_tileable_shape(self)
|
|
1487
|
+
fill_chunk_slices(self)
|
|
1488
|
+
# refresh_index_value(self)
|
|
1489
|
+
# refresh_dtypes(self)
|
|
1490
|
+
|
|
1491
|
+
def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
|
|
1492
|
+
self._dtypes = dtypes
|
|
1493
|
+
self._columns_value = parse_index(dtypes.index, store_data=True)
|
|
1494
|
+
self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
|
|
1495
|
+
new_shape = list(self._shape)
|
|
1496
|
+
new_shape[-1] = len(dtypes)
|
|
1497
|
+
self._shape = tuple(new_shape)
|
|
1498
|
+
|
|
1499
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
1500
|
+
self.refresh_from_dtypes(table_meta.pd_column_dtypes)
|
|
1501
|
+
|
|
1502
|
+
@property
|
|
1503
|
+
def dtypes(self):
|
|
1504
|
+
dt = getattr(self, "_dtypes", None)
|
|
1505
|
+
if dt is not None:
|
|
1506
|
+
return dt
|
|
1507
|
+
return getattr(self.op, "dtypes", None)
|
|
1508
|
+
|
|
1509
|
+
@property
|
|
1510
|
+
def dtypes_value(self):
|
|
1511
|
+
if self._dtypes_value is not None:
|
|
1512
|
+
return self._dtypes_value
|
|
1513
|
+
# TODO(qinxuye): when creating Dataframe,
|
|
1514
|
+
# dtypes_value instead of dtypes later must be passed into
|
|
1515
|
+
dtypes = self.dtypes
|
|
1516
|
+
if dtypes is not None:
|
|
1517
|
+
self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
|
|
1518
|
+
return self._dtypes_value
|
|
1519
|
+
|
|
1520
|
+
@property
|
|
1521
|
+
def index_value(self):
|
|
1522
|
+
return self._index_value
|
|
1523
|
+
|
|
1524
|
+
@property
|
|
1525
|
+
def columns_value(self):
|
|
1526
|
+
return self._columns_value
|
|
1527
|
+
|
|
1528
|
+
@property
|
|
1529
|
+
def empty(self):
|
|
1530
|
+
shape = getattr(self, "_shape")
|
|
1531
|
+
if np.any(np.isnan(shape)):
|
|
1532
|
+
raise ValueError("Tileable object must be executed first")
|
|
1533
|
+
return 0 in shape
|
|
1534
|
+
|
|
1535
|
+
def to_tensor(self, dtype=None):
|
|
1536
|
+
from ..tensor.datasource.from_dataframe import from_dataframe
|
|
1537
|
+
|
|
1538
|
+
return from_dataframe(self, dtype=dtype)
|
|
1539
|
+
|
|
1540
|
+
@property
|
|
1541
|
+
def index(self):
|
|
1542
|
+
from .datasource.index import from_tileable
|
|
1543
|
+
|
|
1544
|
+
return from_tileable(self)
|
|
1545
|
+
|
|
1546
|
+
@property
|
|
1547
|
+
def columns(self):
|
|
1548
|
+
from .datasource.index import from_pandas as from_pandas_index
|
|
1549
|
+
|
|
1550
|
+
return from_pandas_index(self.dtypes.index, store_data=True)
|
|
1551
|
+
|
|
1552
|
+
@property
|
|
1553
|
+
def axes(self):
|
|
1554
|
+
return [self.index, self.columns]
|
|
1555
|
+
|
|
1556
|
+
def _get_dtypes_dict(self):
|
|
1557
|
+
if self._dtypes_dict is None:
|
|
1558
|
+
self._dtypes_dict = d = dict()
|
|
1559
|
+
for k, v in self.dtypes.items():
|
|
1560
|
+
try:
|
|
1561
|
+
obj_list = d[k]
|
|
1562
|
+
except KeyError:
|
|
1563
|
+
obj_list = d[k] = []
|
|
1564
|
+
obj_list.append(v)
|
|
1565
|
+
return self._dtypes_dict
|
|
1566
|
+
|
|
1567
|
+
def _get_dtypes_by_columns(self, columns: list):
|
|
1568
|
+
dtypes_dict = self._get_dtypes_dict()
|
|
1569
|
+
return functools.reduce(operator.add, (dtypes_dict[c] for c in columns), [])
|
|
1570
|
+
|
|
1571
|
+
def _get_columns_by_columns(self, columns: list):
|
|
1572
|
+
dtypes_dict = self._get_dtypes_dict()
|
|
1573
|
+
return functools.reduce(
|
|
1574
|
+
operator.add, ([c] * len(dtypes_dict[c]) for c in columns), []
|
|
1575
|
+
)
|
|
1576
|
+
|
|
1577
|
+
|
|
1578
|
+
class DataFrameData(_BatchedFetcher, BaseDataFrameData):
|
|
1579
|
+
type_name = "DataFrame"
|
|
1580
|
+
|
|
1581
|
+
def _to_str(self, representation=False):
|
|
1582
|
+
if is_build_mode() or len(self._executed_sessions) == 0:
|
|
1583
|
+
# in build mode, or not executed, just return representation
|
|
1584
|
+
if representation:
|
|
1585
|
+
return (
|
|
1586
|
+
f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
|
|
1587
|
+
)
|
|
1588
|
+
else:
|
|
1589
|
+
return f"{self.type_name}(op={type(self._op).__name__})"
|
|
1590
|
+
else:
|
|
1591
|
+
corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
|
|
1592
|
+
|
|
1593
|
+
buf = StringIO()
|
|
1594
|
+
max_rows = pd.get_option("display.max_rows")
|
|
1595
|
+
|
|
1596
|
+
if self.shape[0] <= max_rows or corner_data.shape[0] == 0:
|
|
1597
|
+
buf.write(repr(corner_data) if representation else str(corner_data))
|
|
1598
|
+
else:
|
|
1599
|
+
# remember we cannot directly call repr(df),
|
|
1600
|
+
# because the [... rows x ... columns] may show wrong rows
|
|
1601
|
+
with pd.option_context(
|
|
1602
|
+
"display.show_dimensions",
|
|
1603
|
+
False,
|
|
1604
|
+
"display.max_rows",
|
|
1605
|
+
corner_data.shape[0] - 1,
|
|
1606
|
+
):
|
|
1607
|
+
if representation:
|
|
1608
|
+
s = repr(corner_data)
|
|
1609
|
+
else:
|
|
1610
|
+
s = str(corner_data)
|
|
1611
|
+
buf.write(s)
|
|
1612
|
+
if pd.get_option("display.show_dimensions"):
|
|
1613
|
+
n_rows, n_cols = self.shape
|
|
1614
|
+
buf.write(f"\n\n[{n_rows} rows x {n_cols} columns]")
|
|
1615
|
+
|
|
1616
|
+
return buf.getvalue()
|
|
1617
|
+
|
|
1618
|
+
def __str__(self):
|
|
1619
|
+
return self._to_str(representation=False)
|
|
1620
|
+
|
|
1621
|
+
def __repr__(self):
|
|
1622
|
+
return self._to_str(representation=True)
|
|
1623
|
+
|
|
1624
|
+
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
1625
|
+
return self.to_tensor().astype(dtype=dtype, order=order, copy=False)
|
|
1626
|
+
|
|
1627
|
+
def _repr_html_(self):
|
|
1628
|
+
if len(self._executed_sessions) == 0:
|
|
1629
|
+
# not executed before, fall back to normal repr
|
|
1630
|
+
raise NotImplementedError
|
|
1631
|
+
|
|
1632
|
+
corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
|
|
1633
|
+
if corner_data is None:
|
|
1634
|
+
return
|
|
1635
|
+
|
|
1636
|
+
buf = StringIO()
|
|
1637
|
+
max_rows = pd.get_option("display.max_rows")
|
|
1638
|
+
if self.shape[0] <= max_rows:
|
|
1639
|
+
buf.write(corner_data._repr_html_())
|
|
1640
|
+
else:
|
|
1641
|
+
with pd.option_context(
|
|
1642
|
+
"display.show_dimensions",
|
|
1643
|
+
False,
|
|
1644
|
+
"display.max_rows",
|
|
1645
|
+
corner_data.shape[0] - 1,
|
|
1646
|
+
):
|
|
1647
|
+
buf.write(corner_data._repr_html_().rstrip().rstrip("</div>"))
|
|
1648
|
+
if pd.get_option("display.show_dimensions"):
|
|
1649
|
+
n_rows, n_cols = self.shape
|
|
1650
|
+
buf.write(f"<p>{n_rows} rows × {n_cols} columns</p>\n")
|
|
1651
|
+
buf.write("</div>")
|
|
1652
|
+
|
|
1653
|
+
return buf.getvalue()
|
|
1654
|
+
|
|
1655
|
+
def items(self):
|
|
1656
|
+
for col_name in self.dtypes.index:
|
|
1657
|
+
yield col_name, self[col_name]
|
|
1658
|
+
|
|
1659
|
+
iteritems = items
|
|
1660
|
+
|
|
1661
|
+
def iterrows(self, batch_size=1000, session=None):
|
|
1662
|
+
for batch_data in self.iterbatch(batch_size=batch_size, session=session):
|
|
1663
|
+
yield from getattr(batch_data, "iterrows")()
|
|
1664
|
+
|
|
1665
|
+
def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
|
|
1666
|
+
for batch_data in self.iterbatch(batch_size=batch_size, session=session):
|
|
1667
|
+
yield from getattr(batch_data, "itertuples")(index=index, name=name)
|
|
1668
|
+
|
|
1669
|
+
def _need_execution(self):
|
|
1670
|
+
if self._dtypes is None:
|
|
1671
|
+
return True
|
|
1672
|
+
return False
|
|
1673
|
+
|
|
1674
|
+
|
|
1675
|
+
class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
1676
|
+
__slots__ = ("_cache",)
|
|
1677
|
+
_allow_data_type_ = (DataFrameData,)
|
|
1678
|
+
type_name = "DataFrame"
|
|
1679
|
+
|
|
1680
|
+
def __len__(self):
|
|
1681
|
+
return len(self._data)
|
|
1682
|
+
|
|
1683
|
+
def to_tensor(self):
|
|
1684
|
+
return self._data.to_tensor()
|
|
1685
|
+
|
|
1686
|
+
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
1687
|
+
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
1688
|
+
|
|
1689
|
+
def __getattr__(self, key):
|
|
1690
|
+
try:
|
|
1691
|
+
return getattr(self._data, key)
|
|
1692
|
+
except AttributeError:
|
|
1693
|
+
if key in self.dtypes:
|
|
1694
|
+
return self[key]
|
|
1695
|
+
else:
|
|
1696
|
+
raise
|
|
1697
|
+
|
|
1698
|
+
def __dir__(self):
|
|
1699
|
+
result = list(super().__dir__())
|
|
1700
|
+
return sorted(
|
|
1701
|
+
result
|
|
1702
|
+
+ [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
|
|
1703
|
+
)
|
|
1704
|
+
|
|
1705
|
+
def __iter__(self):
|
|
1706
|
+
# prevent being called by pandas to make sure `__eq__` works
|
|
1707
|
+
prevent_called_from_pandas()
|
|
1708
|
+
return iter(self.dtypes.index)
|
|
1709
|
+
|
|
1710
|
+
def __class_getitem__(cls, item):
|
|
1711
|
+
return DataFrameType.from_getitem_args(item)
|
|
1712
|
+
|
|
1713
|
+
@property
|
|
1714
|
+
def T(self):
|
|
1715
|
+
return self.transpose()
|
|
1716
|
+
|
|
1717
|
+
@property
|
|
1718
|
+
def ndim(self):
|
|
1719
|
+
"""
|
|
1720
|
+
Return an int representing the number of axes / array dimensions.
|
|
1721
|
+
|
|
1722
|
+
Return 1 if Series. Otherwise return 2 if DataFrame.
|
|
1723
|
+
|
|
1724
|
+
See Also
|
|
1725
|
+
--------
|
|
1726
|
+
ndarray.ndim : Number of array dimensions.
|
|
1727
|
+
|
|
1728
|
+
Examples
|
|
1729
|
+
--------
|
|
1730
|
+
>>> import maxframe.dataframe as md
|
|
1731
|
+
>>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
|
|
1732
|
+
>>> s.ndim
|
|
1733
|
+
1
|
|
1734
|
+
|
|
1735
|
+
>>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
|
|
1736
|
+
>>> df.ndim
|
|
1737
|
+
2
|
|
1738
|
+
"""
|
|
1739
|
+
return super().ndim
|
|
1740
|
+
|
|
1741
|
+
@property
|
|
1742
|
+
def index(self):
|
|
1743
|
+
idx = self._data.index
|
|
1744
|
+
idx._set_df_or_series(self, 0)
|
|
1745
|
+
return idx
|
|
1746
|
+
|
|
1747
|
+
@index.setter
|
|
1748
|
+
def index(self, new_index):
|
|
1749
|
+
self.set_axis(new_index, axis=0, inplace=True)
|
|
1750
|
+
|
|
1751
|
+
@property
|
|
1752
|
+
def columns(self):
|
|
1753
|
+
col = self._data.columns
|
|
1754
|
+
col._set_df_or_series(self, 1)
|
|
1755
|
+
return col
|
|
1756
|
+
|
|
1757
|
+
@columns.setter
|
|
1758
|
+
def columns(self, new_columns):
|
|
1759
|
+
self.set_axis(new_columns, axis=1, inplace=True)
|
|
1760
|
+
|
|
1761
|
+
def keys(self):
|
|
1762
|
+
"""
|
|
1763
|
+
Get the 'info axis' (see Indexing for more).
|
|
1764
|
+
|
|
1765
|
+
This is index for Series, columns for DataFrame.
|
|
1766
|
+
|
|
1767
|
+
Returns
|
|
1768
|
+
-------
|
|
1769
|
+
Index
|
|
1770
|
+
Info axis.
|
|
1771
|
+
"""
|
|
1772
|
+
return self.columns
|
|
1773
|
+
|
|
1774
|
+
@property
|
|
1775
|
+
def values(self):
|
|
1776
|
+
return self.to_tensor()
|
|
1777
|
+
|
|
1778
|
+
@property
|
|
1779
|
+
def dtypes(self):
|
|
1780
|
+
"""
|
|
1781
|
+
Return the dtypes in the DataFrame.
|
|
1782
|
+
|
|
1783
|
+
This returns a Series with the data type of each column.
|
|
1784
|
+
The result's index is the original DataFrame's columns. Columns
|
|
1785
|
+
with mixed types are stored with the ``object`` dtype. See
|
|
1786
|
+
:ref:`the User Guide <basics.dtypes>` for more.
|
|
1787
|
+
|
|
1788
|
+
Returns
|
|
1789
|
+
-------
|
|
1790
|
+
pandas.Series
|
|
1791
|
+
The data type of each column.
|
|
1792
|
+
|
|
1793
|
+
Examples
|
|
1794
|
+
--------
|
|
1795
|
+
>>> import maxframe.dataframe as md
|
|
1796
|
+
>>> df = md.DataFrame({'float': [1.0],
|
|
1797
|
+
... 'int': [1],
|
|
1798
|
+
... 'datetime': [md.Timestamp('20180310')],
|
|
1799
|
+
... 'string': ['foo']})
|
|
1800
|
+
>>> df.dtypes
|
|
1801
|
+
float float64
|
|
1802
|
+
int int64
|
|
1803
|
+
datetime datetime64[ns]
|
|
1804
|
+
string object
|
|
1805
|
+
dtype: object
|
|
1806
|
+
"""
|
|
1807
|
+
return self._data.dtypes
|
|
1808
|
+
|
|
1809
|
+
def iterrows(self, batch_size=1000, session=None):
|
|
1810
|
+
"""
|
|
1811
|
+
Iterate over DataFrame rows as (index, Series) pairs.
|
|
1812
|
+
|
|
1813
|
+
Yields
|
|
1814
|
+
------
|
|
1815
|
+
index : label or tuple of label
|
|
1816
|
+
The index of the row. A tuple for a `MultiIndex`.
|
|
1817
|
+
data : Series
|
|
1818
|
+
The data of the row as a Series.
|
|
1819
|
+
|
|
1820
|
+
it : generator
|
|
1821
|
+
A generator that iterates over the rows of the frame.
|
|
1822
|
+
|
|
1823
|
+
See Also
|
|
1824
|
+
--------
|
|
1825
|
+
DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values.
|
|
1826
|
+
DataFrame.items : Iterate over (column name, Series) pairs.
|
|
1827
|
+
|
|
1828
|
+
Notes
|
|
1829
|
+
-----
|
|
1830
|
+
|
|
1831
|
+
1. Because ``iterrows`` returns a Series for each row,
|
|
1832
|
+
it does **not** preserve dtypes across the rows (dtypes are
|
|
1833
|
+
preserved across columns for DataFrames). For example,
|
|
1834
|
+
|
|
1835
|
+
>>> import maxframe.dataframe as md
|
|
1836
|
+
>>> df = md.DataFrame([[1, 1.5]], columns=['int', 'float'])
|
|
1837
|
+
>>> row = next(df.iterrows())[1]
|
|
1838
|
+
>>> row
|
|
1839
|
+
int 1.0
|
|
1840
|
+
float 1.5
|
|
1841
|
+
Name: 0, dtype: float64
|
|
1842
|
+
>>> print(row['int'].dtype)
|
|
1843
|
+
float64
|
|
1844
|
+
>>> print(df['int'].dtype)
|
|
1845
|
+
int64
|
|
1846
|
+
|
|
1847
|
+
To preserve dtypes while iterating over the rows, it is better
|
|
1848
|
+
to use :meth:`itertuples` which returns namedtuples of the values
|
|
1849
|
+
and which is generally faster than ``iterrows``.
|
|
1850
|
+
|
|
1851
|
+
2. You should **never modify** something you are iterating over.
|
|
1852
|
+
This is not guaranteed to work in all cases. Depending on the
|
|
1853
|
+
data types, the iterator returns a copy and not a view, and writing
|
|
1854
|
+
to it will have no effect.
|
|
1855
|
+
"""
|
|
1856
|
+
return self._data.iterrows(batch_size=batch_size, session=session)
|
|
1857
|
+
|
|
1858
|
+
def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
|
|
1859
|
+
"""
|
|
1860
|
+
Iterate over DataFrame rows as namedtuples.
|
|
1861
|
+
|
|
1862
|
+
Parameters
|
|
1863
|
+
----------
|
|
1864
|
+
index : bool, default True
|
|
1865
|
+
If True, return the index as the first element of the tuple.
|
|
1866
|
+
name : str or None, default "Pandas"
|
|
1867
|
+
The name of the returned namedtuples or None to return regular
|
|
1868
|
+
tuples.
|
|
1869
|
+
|
|
1870
|
+
Returns
|
|
1871
|
+
-------
|
|
1872
|
+
iterator
|
|
1873
|
+
An object to iterate over namedtuples for each row in the
|
|
1874
|
+
DataFrame with the first field possibly being the index and
|
|
1875
|
+
following fields being the column values.
|
|
1876
|
+
|
|
1877
|
+
See Also
|
|
1878
|
+
--------
|
|
1879
|
+
DataFrame.iterrows : Iterate over DataFrame rows as (index, Series)
|
|
1880
|
+
pairs.
|
|
1881
|
+
DataFrame.items : Iterate over (column name, Series) pairs.
|
|
1882
|
+
|
|
1883
|
+
Notes
|
|
1884
|
+
-----
|
|
1885
|
+
The column names will be renamed to positional names if they are
|
|
1886
|
+
invalid Python identifiers, repeated, or start with an underscore.
|
|
1887
|
+
On python versions < 3.7 regular tuples are returned for DataFrames
|
|
1888
|
+
with a large number of columns (>254).
|
|
1889
|
+
|
|
1890
|
+
Examples
|
|
1891
|
+
--------
|
|
1892
|
+
>>> import maxframe.dataframe as md
|
|
1893
|
+
>>> df = md.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]},
|
|
1894
|
+
... index=['dog', 'hawk'])
|
|
1895
|
+
>>> df.execute()
|
|
1896
|
+
num_legs num_wings
|
|
1897
|
+
dog 4 0
|
|
1898
|
+
hawk 2 2
|
|
1899
|
+
>>> for row in df.itertuples():
|
|
1900
|
+
... print(row)
|
|
1901
|
+
...
|
|
1902
|
+
Pandas(Index='dog', num_legs=4, num_wings=0)
|
|
1903
|
+
Pandas(Index='hawk', num_legs=2, num_wings=2)
|
|
1904
|
+
|
|
1905
|
+
By setting the `index` parameter to False we can remove the index
|
|
1906
|
+
as the first element of the tuple:
|
|
1907
|
+
|
|
1908
|
+
>>> for row in df.itertuples(index=False):
|
|
1909
|
+
... print(row)
|
|
1910
|
+
...
|
|
1911
|
+
Pandas(num_legs=4, num_wings=0)
|
|
1912
|
+
Pandas(num_legs=2, num_wings=2)
|
|
1913
|
+
|
|
1914
|
+
With the `name` parameter set we set a custom name for the yielded
|
|
1915
|
+
namedtuples:
|
|
1916
|
+
|
|
1917
|
+
>>> for row in df.itertuples(name='Animal'):
|
|
1918
|
+
... print(row)
|
|
1919
|
+
...
|
|
1920
|
+
Animal(Index='dog', num_legs=4, num_wings=0)
|
|
1921
|
+
Animal(Index='hawk', num_legs=2, num_wings=2)
|
|
1922
|
+
"""
|
|
1923
|
+
return self._data.itertuples(
|
|
1924
|
+
batch_size=batch_size, session=session, index=index, name=name
|
|
1925
|
+
)
|
|
1926
|
+
|
|
1927
|
+
def assign(self, **kwargs):
|
|
1928
|
+
"""
|
|
1929
|
+
Assign new columns to a DataFrame.
|
|
1930
|
+
Returns a new object with all original columns in addition to new ones.
|
|
1931
|
+
Existing columns that are re-assigned will be overwritten.
|
|
1932
|
+
|
|
1933
|
+
Parameters
|
|
1934
|
+
----------
|
|
1935
|
+
**kwargs : dict of {str: callable or Series}
|
|
1936
|
+
The column names are keywords. If the values are
|
|
1937
|
+
callable, they are computed on the DataFrame and
|
|
1938
|
+
assigned to the new columns. The callable must not
|
|
1939
|
+
change input DataFrame (though pandas doesn't check it).
|
|
1940
|
+
If the values are not callable, (e.g. a Series, scalar, or array),
|
|
1941
|
+
they are simply assigned.
|
|
1942
|
+
|
|
1943
|
+
Returns
|
|
1944
|
+
-------
|
|
1945
|
+
DataFrame
|
|
1946
|
+
A new DataFrame with the new columns in addition to
|
|
1947
|
+
all the existing columns.
|
|
1948
|
+
|
|
1949
|
+
Notes
|
|
1950
|
+
-----
|
|
1951
|
+
Assigning multiple columns within the same ``assign`` is possible.
|
|
1952
|
+
Later items in 'kwargs' may refer to newly created or modified
|
|
1953
|
+
columns in 'df'; items are computed and assigned into 'df' in order.
|
|
1954
|
+
|
|
1955
|
+
Examples
|
|
1956
|
+
--------
|
|
1957
|
+
>>> import maxframe.dataframe as md
|
|
1958
|
+
>>> df = md.DataFrame({'temp_c': [17.0, 25.0]},
|
|
1959
|
+
... index=['Portland', 'Berkeley'])
|
|
1960
|
+
>>> df.execute()
|
|
1961
|
+
temp_c
|
|
1962
|
+
Portland 17.0
|
|
1963
|
+
Berkeley 25.0
|
|
1964
|
+
|
|
1965
|
+
Where the value is a callable, evaluated on `df`:
|
|
1966
|
+
|
|
1967
|
+
>>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32).execute()
|
|
1968
|
+
temp_c temp_f
|
|
1969
|
+
Portland 17.0 62.6
|
|
1970
|
+
Berkeley 25.0 77.0
|
|
1971
|
+
|
|
1972
|
+
Alternatively, the same behavior can be achieved by directly
|
|
1973
|
+
referencing an existing Series or sequence:
|
|
1974
|
+
|
|
1975
|
+
>>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32).execute()
|
|
1976
|
+
temp_c temp_f
|
|
1977
|
+
Portland 17.0 62.6
|
|
1978
|
+
Berkeley 25.0 77.0
|
|
1979
|
+
|
|
1980
|
+
You can create multiple columns within the same assign where one
|
|
1981
|
+
of the columns depends on another one defined within the same assign:
|
|
1982
|
+
|
|
1983
|
+
>>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
|
|
1984
|
+
... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9).execute()
|
|
1985
|
+
temp_c temp_f temp_k
|
|
1986
|
+
Portland 17.0 62.6 290.15
|
|
1987
|
+
Berkeley 25.0 77.0 298.15
|
|
1988
|
+
"""
|
|
1989
|
+
|
|
1990
|
+
data = self.copy()
|
|
1991
|
+
|
|
1992
|
+
for k, v in kwargs.items():
|
|
1993
|
+
data[k] = apply_if_callable(v, data)
|
|
1994
|
+
return data
|
|
1995
|
+
|
|
1996
|
+
|
|
1997
|
+
class DataFrameGroupByData(BaseDataFrameData):
|
|
1998
|
+
type_name = "DataFrameGroupBy"
|
|
1999
|
+
|
|
2000
|
+
_key_dtypes = SeriesField("key_dtypes")
|
|
2001
|
+
_selection = AnyField("selection")
|
|
2002
|
+
|
|
2003
|
+
@property
|
|
2004
|
+
def key_dtypes(self):
|
|
2005
|
+
return self._key_dtypes
|
|
2006
|
+
|
|
2007
|
+
@property
|
|
2008
|
+
def selection(self):
|
|
2009
|
+
return self._selection
|
|
2010
|
+
|
|
2011
|
+
def _get_params(self) -> Dict[str, Any]:
|
|
2012
|
+
p = super()._get_params()
|
|
2013
|
+
p.update(dict(key_dtypes=self.key_dtypes, selection=self.selection))
|
|
2014
|
+
return p
|
|
2015
|
+
|
|
2016
|
+
def _set_params(self, new_params: Dict[str, Any]):
|
|
2017
|
+
params = new_params.copy()
|
|
2018
|
+
key_dtypes = params.pop("key_dtypes", None)
|
|
2019
|
+
if key_dtypes is not None:
|
|
2020
|
+
self._key_dtypes = key_dtypes
|
|
2021
|
+
selection = params.pop("selection", None)
|
|
2022
|
+
if selection is not None:
|
|
2023
|
+
self._selection = selection
|
|
2024
|
+
super()._set_params(params)
|
|
2025
|
+
|
|
2026
|
+
params = property(_get_params, _set_params)
|
|
2027
|
+
|
|
2028
|
+
def __init__(self, key_dtypes=None, selection=None, **kw):
|
|
2029
|
+
super().__init__(_key_dtypes=key_dtypes, _selection=selection, **kw)
|
|
2030
|
+
|
|
2031
|
+
def _equal(self, o):
|
|
2032
|
+
# FIXME We need to implemented a true `==` operator for DataFrameGroupby
|
|
2033
|
+
if is_build_mode():
|
|
2034
|
+
return self is o
|
|
2035
|
+
else:
|
|
2036
|
+
return self == o
|
|
2037
|
+
|
|
2038
|
+
|
|
2039
|
+
class SeriesGroupByData(BaseSeriesData):
|
|
2040
|
+
type_name = "SeriesGroupBy"
|
|
2041
|
+
|
|
2042
|
+
_key_dtypes = AnyField("key_dtypes")
|
|
2043
|
+
|
|
2044
|
+
@property
|
|
2045
|
+
def key_dtypes(self):
|
|
2046
|
+
return self._key_dtypes
|
|
2047
|
+
|
|
2048
|
+
def _get_params(self) -> Dict[str, Any]:
|
|
2049
|
+
p = super()._get_params()
|
|
2050
|
+
p["key_dtypes"] = self.key_dtypes
|
|
2051
|
+
return p
|
|
2052
|
+
|
|
2053
|
+
def _set_params(self, new_params: Dict[str, Any]):
|
|
2054
|
+
params = new_params.copy()
|
|
2055
|
+
key_dtypes = params.pop("key_dtypes", None)
|
|
2056
|
+
if key_dtypes is not None:
|
|
2057
|
+
self._key_dtypes = key_dtypes
|
|
2058
|
+
super()._set_params(params)
|
|
2059
|
+
|
|
2060
|
+
params = property(_get_params, _set_params)
|
|
2061
|
+
|
|
2062
|
+
def __init__(self, key_dtypes=None, **kw):
|
|
2063
|
+
super().__init__(_key_dtypes=key_dtypes, **kw)
|
|
2064
|
+
|
|
2065
|
+
def _equal(self, o):
|
|
2066
|
+
# FIXME We need to implemented a true `==` operator for DataFrameGroupby
|
|
2067
|
+
if is_build_mode():
|
|
2068
|
+
return self is o
|
|
2069
|
+
else:
|
|
2070
|
+
return self == o
|
|
2071
|
+
|
|
2072
|
+
|
|
2073
|
+
class GroupBy(Tileable, _ToPandasMixin):
|
|
2074
|
+
__slots__ = ()
|
|
2075
|
+
|
|
2076
|
+
|
|
2077
|
+
class DataFrameGroupBy(GroupBy):
|
|
2078
|
+
__slots__ = ()
|
|
2079
|
+
_allow_data_type_ = (DataFrameGroupByData,)
|
|
2080
|
+
type_name = "DataFrameGroupBy"
|
|
2081
|
+
|
|
2082
|
+
def __eq__(self, other):
|
|
2083
|
+
return self._equal(other)
|
|
2084
|
+
|
|
2085
|
+
def __hash__(self):
|
|
2086
|
+
# NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
|
|
2087
|
+
return super().__hash__()
|
|
2088
|
+
|
|
2089
|
+
def __getattr__(self, item):
|
|
2090
|
+
try:
|
|
2091
|
+
return super().__getattr__(item)
|
|
2092
|
+
except AttributeError:
|
|
2093
|
+
if item in self.dtypes:
|
|
2094
|
+
return self[item]
|
|
2095
|
+
else:
|
|
2096
|
+
raise
|
|
2097
|
+
|
|
2098
|
+
def __dir__(self):
|
|
2099
|
+
result = list(super().__dir__())
|
|
2100
|
+
return sorted(
|
|
2101
|
+
result
|
|
2102
|
+
+ [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
|
|
2103
|
+
)
|
|
2104
|
+
|
|
2105
|
+
|
|
2106
|
+
class SeriesGroupBy(GroupBy):
|
|
2107
|
+
__slots__ = ()
|
|
2108
|
+
_allow_data_type_ = (SeriesGroupByData,)
|
|
2109
|
+
type_name = "SeriesGroupBy"
|
|
2110
|
+
|
|
2111
|
+
def __eq__(self, other):
|
|
2112
|
+
return self._equal(other)
|
|
2113
|
+
|
|
2114
|
+
def __hash__(self):
|
|
2115
|
+
# NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
|
|
2116
|
+
return super().__hash__()
|
|
2117
|
+
|
|
2118
|
+
|
|
2119
|
+
class CategoricalData(HasShapeTileableData, _ToPandasMixin):
|
|
2120
|
+
__slots__ = ("_cache",)
|
|
2121
|
+
type_name = "Categorical"
|
|
2122
|
+
|
|
2123
|
+
# optional field
|
|
2124
|
+
_dtype = DataTypeField("dtype")
|
|
2125
|
+
_categories_value = ReferenceField(
|
|
2126
|
+
"categories_value", IndexValue, on_deserialize=_on_deserialize_index_value
|
|
2127
|
+
)
|
|
2128
|
+
|
|
2129
|
+
def __init__(
|
|
2130
|
+
self,
|
|
2131
|
+
op=None,
|
|
2132
|
+
shape=None,
|
|
2133
|
+
nsplits=None,
|
|
2134
|
+
dtype=None,
|
|
2135
|
+
categories_value=None,
|
|
2136
|
+
**kw,
|
|
2137
|
+
):
|
|
2138
|
+
super().__init__(
|
|
2139
|
+
_op=op,
|
|
2140
|
+
_shape=shape,
|
|
2141
|
+
_nsplits=nsplits,
|
|
2142
|
+
_dtype=dtype,
|
|
2143
|
+
_categories_value=categories_value,
|
|
2144
|
+
**kw,
|
|
2145
|
+
)
|
|
2146
|
+
|
|
2147
|
+
@property
|
|
2148
|
+
def params(self) -> Dict[str, Any]:
|
|
2149
|
+
# params return the properties which useful to rebuild a new tileable object
|
|
2150
|
+
return {
|
|
2151
|
+
"shape": self.shape,
|
|
2152
|
+
"dtype": self.dtype,
|
|
2153
|
+
"categories_value": self.categories_value,
|
|
2154
|
+
}
|
|
2155
|
+
|
|
2156
|
+
@params.setter
|
|
2157
|
+
def params(self, new_params: Dict[str, Any]):
|
|
2158
|
+
params = new_params.copy()
|
|
2159
|
+
new_shape = params.pop("shape", None)
|
|
2160
|
+
if new_shape is not None:
|
|
2161
|
+
self._shape = new_shape
|
|
2162
|
+
dtype = params.pop("dtype", None)
|
|
2163
|
+
if dtype is not None:
|
|
2164
|
+
self._dtype = dtype
|
|
2165
|
+
categories_value = params.pop("categories_value", None)
|
|
2166
|
+
if categories_value is not None:
|
|
2167
|
+
self._categories_value = categories_value
|
|
2168
|
+
if params: # pragma: no cover
|
|
2169
|
+
raise TypeError(f"Unknown params: {list(params)}")
|
|
2170
|
+
|
|
2171
|
+
def refresh_params(self):
|
|
2172
|
+
# refresh params when chunks updated
|
|
2173
|
+
refresh_tileable_shape(self)
|
|
2174
|
+
fill_chunk_slices(self)
|
|
2175
|
+
if self._dtype is None:
|
|
2176
|
+
self._dtype = self.chunks[0].dtype
|
|
2177
|
+
if self._categories_value is None:
|
|
2178
|
+
categories = []
|
|
2179
|
+
for chunk in self.chunks:
|
|
2180
|
+
categories.extend(chunk.categories_value.to_pandas())
|
|
2181
|
+
self._categories_value = parse_index(
|
|
2182
|
+
pd.Categorical(categories).categories, store_data=True
|
|
2183
|
+
)
|
|
2184
|
+
|
|
2185
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
2186
|
+
pass
|
|
2187
|
+
|
|
2188
|
+
def _to_str(self, representation=False):
|
|
2189
|
+
if is_build_mode() or len(self._executed_sessions) == 0:
|
|
2190
|
+
# in build mode, or not executed, just return representation
|
|
2191
|
+
if representation:
|
|
2192
|
+
return f"{self.type_name} <op={type(self.op).__name__}, key={self.key}>"
|
|
2193
|
+
else:
|
|
2194
|
+
return f"{self.type_name}(op={type(self.op).__name__})"
|
|
2195
|
+
else:
|
|
2196
|
+
data = self.fetch(session=self._executed_sessions[-1])
|
|
2197
|
+
return repr(data) if repr(data) else str(data)
|
|
2198
|
+
|
|
2199
|
+
def __str__(self):
|
|
2200
|
+
return self._to_str(representation=False)
|
|
2201
|
+
|
|
2202
|
+
def __repr__(self):
|
|
2203
|
+
return self._to_str(representation=True)
|
|
2204
|
+
|
|
2205
|
+
def _equal(self, o):
|
|
2206
|
+
# FIXME We need to implemented a true `==` operator for DataFrameGroupby
|
|
2207
|
+
if is_build_mode():
|
|
2208
|
+
return self is o
|
|
2209
|
+
else: # pragma: no cover
|
|
2210
|
+
return self == o
|
|
2211
|
+
|
|
2212
|
+
@property
|
|
2213
|
+
def dtype(self):
|
|
2214
|
+
return getattr(self, "_dtype", None) or self.op.dtype
|
|
2215
|
+
|
|
2216
|
+
@property
|
|
2217
|
+
def categories_value(self):
|
|
2218
|
+
return self._categories_value
|
|
2219
|
+
|
|
2220
|
+
def __eq__(self, other):
|
|
2221
|
+
return self._equal(other)
|
|
2222
|
+
|
|
2223
|
+
def __hash__(self):
|
|
2224
|
+
# NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
|
|
2225
|
+
return super().__hash__()
|
|
2226
|
+
|
|
2227
|
+
|
|
2228
|
+
class Categorical(HasShapeTileable, _ToPandasMixin):
|
|
2229
|
+
__slots__ = ()
|
|
2230
|
+
_allow_data_type_ = (CategoricalData,)
|
|
2231
|
+
type_name = "Categorical"
|
|
2232
|
+
|
|
2233
|
+
def __len__(self):
|
|
2234
|
+
return len(self._data)
|
|
2235
|
+
|
|
2236
|
+
def __eq__(self, other):
|
|
2237
|
+
return self._equal(other)
|
|
2238
|
+
|
|
2239
|
+
def __hash__(self):
|
|
2240
|
+
# NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
|
|
2241
|
+
return super().__hash__()
|
|
2242
|
+
|
|
2243
|
+
|
|
2244
|
+
class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
2245
|
+
__slots__ = ()
|
|
2246
|
+
|
|
2247
|
+
_data_type = StringField("data_type")
|
|
2248
|
+
_data_params = DictField("data_params")
|
|
2249
|
+
|
|
2250
|
+
def __init__(
|
|
2251
|
+
self,
|
|
2252
|
+
op=None,
|
|
2253
|
+
data_type=None,
|
|
2254
|
+
data_params=None,
|
|
2255
|
+
**kw,
|
|
2256
|
+
):
|
|
2257
|
+
self._data_type = data_type
|
|
2258
|
+
self._data_params = data_params or dict()
|
|
2259
|
+
super().__init__(
|
|
2260
|
+
_op=op,
|
|
2261
|
+
**kw,
|
|
2262
|
+
)
|
|
2263
|
+
|
|
2264
|
+
def __getattr__(self, item):
|
|
2265
|
+
if item in self._data_params:
|
|
2266
|
+
return self._data_params[item]
|
|
2267
|
+
raise AttributeError(f"'{type(self)}' object has no attribute '{item}'")
|
|
2268
|
+
|
|
2269
|
+
@property
|
|
2270
|
+
def shape(self):
|
|
2271
|
+
return self._data_params.get("shape", None)
|
|
2272
|
+
|
|
2273
|
+
@property
|
|
2274
|
+
def nsplits(self):
|
|
2275
|
+
return self._data_params.get("nsplits", None)
|
|
2276
|
+
|
|
2277
|
+
@property
|
|
2278
|
+
def data_type(self):
|
|
2279
|
+
return self._data_type
|
|
2280
|
+
|
|
2281
|
+
@property
|
|
2282
|
+
def data_params(self):
|
|
2283
|
+
return self._data_params
|
|
2284
|
+
|
|
2285
|
+
@property
|
|
2286
|
+
def params(self) -> Dict[str, Any]:
|
|
2287
|
+
return {"data_type": self._data_type, "data_params": self._data_params}
|
|
2288
|
+
|
|
2289
|
+
@params.setter
|
|
2290
|
+
def params(self, new_params: Dict[str, Any]):
|
|
2291
|
+
# After execution, create DataFrameFetch, and the data
|
|
2292
|
+
# corresponding to the original key is still DataFrameOrSeries type,
|
|
2293
|
+
# so when restoring DataFrameOrSeries type,
|
|
2294
|
+
# there is no "data_type" field in params.
|
|
2295
|
+
if "data_type" not in new_params:
|
|
2296
|
+
if "dtype" in new_params:
|
|
2297
|
+
self._data_type = "series"
|
|
2298
|
+
else:
|
|
2299
|
+
self._data_type = "dataframe"
|
|
2300
|
+
self._data_params = new_params.copy()
|
|
2301
|
+
else:
|
|
2302
|
+
self._data_type = new_params.get("data_type")
|
|
2303
|
+
self._data_params = {
|
|
2304
|
+
k: v for k, v in new_params.get("data_params", {}).items()
|
|
2305
|
+
}
|
|
2306
|
+
|
|
2307
|
+
def refresh_params(self):
|
|
2308
|
+
index_to_index_values = dict()
|
|
2309
|
+
for chunk in self.chunks:
|
|
2310
|
+
if chunk.ndim == 1:
|
|
2311
|
+
index_to_index_values[chunk.index] = chunk.index_value
|
|
2312
|
+
elif chunk.index[1] == 0:
|
|
2313
|
+
index_to_index_values[chunk.index] = chunk.index_value
|
|
2314
|
+
index_value = merge_index_value(index_to_index_values, store_data=False)
|
|
2315
|
+
nsplits = calc_nsplits({c.index: c.shape for c in self.chunks})
|
|
2316
|
+
shape = tuple(sum(ns) for ns in nsplits)
|
|
2317
|
+
|
|
2318
|
+
data_params = dict()
|
|
2319
|
+
data_params["nsplits"] = nsplits
|
|
2320
|
+
data_params["shape"] = shape
|
|
2321
|
+
data_params["index_value"] = index_value
|
|
2322
|
+
|
|
2323
|
+
self._data_type = self._chunks[0]._data_type
|
|
2324
|
+
if self.data_type == "dataframe":
|
|
2325
|
+
all_dtypes = [c.dtypes_value.value for c in self.chunks if c.index[0] == 0]
|
|
2326
|
+
dtypes = pd.concat(all_dtypes)
|
|
2327
|
+
data_params["dtypes"] = dtypes
|
|
2328
|
+
columns_values = parse_index(dtypes.index, store_data=True)
|
|
2329
|
+
data_params["columns_value"] = columns_values
|
|
2330
|
+
data_params["dtypes_value"] = DtypesValue(
|
|
2331
|
+
key=tokenize(dtypes), value=dtypes
|
|
2332
|
+
)
|
|
2333
|
+
else:
|
|
2334
|
+
data_params["dtype"] = self.chunks[0].dtype
|
|
2335
|
+
data_params["name"] = self.chunks[0].name
|
|
2336
|
+
self._data_params.update(data_params)
|
|
2337
|
+
|
|
2338
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
2339
|
+
pass
|
|
2340
|
+
|
|
2341
|
+
def ensure_data(self):
|
|
2342
|
+
from .fetch.core import DataFrameFetch
|
|
2343
|
+
|
|
2344
|
+
self.execute()
|
|
2345
|
+
default_sess = get_default_session()
|
|
2346
|
+
self._detach_session(default_sess._session)
|
|
2347
|
+
|
|
2348
|
+
fetch_tileable = default_sess._session._tileable_to_fetch[self]
|
|
2349
|
+
new = DataFrameFetch(
|
|
2350
|
+
output_types=[getattr(OutputType, self.data_type)]
|
|
2351
|
+
).new_tileable(
|
|
2352
|
+
[],
|
|
2353
|
+
_key=self.key,
|
|
2354
|
+
chunks=fetch_tileable.chunks,
|
|
2355
|
+
nsplits=fetch_tileable.nsplits,
|
|
2356
|
+
**self.data_params,
|
|
2357
|
+
)
|
|
2358
|
+
new._attach_session(default_sess._session)
|
|
2359
|
+
return new
|
|
2360
|
+
|
|
2361
|
+
|
|
2362
|
+
class DataFrameOrSeries(HasShapeTileable, _ToPandasMixin):
|
|
2363
|
+
__slots__ = ()
|
|
2364
|
+
_allow_data_type_ = (DataFrameOrSeriesData,)
|
|
2365
|
+
type_name = "DataFrameOrSeries"
|
|
2366
|
+
|
|
2367
|
+
|
|
2368
|
+
INDEX_TYPE = (Index, IndexData)
|
|
2369
|
+
SERIES_TYPE = (Series, SeriesData)
|
|
2370
|
+
DATAFRAME_OR_SERIES_TYPE = (DataFrameOrSeries, DataFrameOrSeriesData)
|
|
2371
|
+
DATAFRAME_TYPE = (DataFrame, DataFrameData)
|
|
2372
|
+
DATAFRAME_GROUPBY_TYPE = (DataFrameGroupBy, DataFrameGroupByData)
|
|
2373
|
+
SERIES_GROUPBY_TYPE = (SeriesGroupBy, SeriesGroupByData)
|
|
2374
|
+
GROUPBY_TYPE = (GroupBy,) + DATAFRAME_GROUPBY_TYPE + SERIES_GROUPBY_TYPE
|
|
2375
|
+
CATEGORICAL_TYPE = (Categorical, CategoricalData)
|
|
2376
|
+
TILEABLE_TYPE = (
|
|
2377
|
+
INDEX_TYPE + SERIES_TYPE + DATAFRAME_TYPE + GROUPBY_TYPE + CATEGORICAL_TYPE
|
|
2378
|
+
)
|
|
2379
|
+
|
|
2380
|
+
register_output_types(OutputType.dataframe, DATAFRAME_TYPE)
|
|
2381
|
+
register_output_types(OutputType.series, SERIES_TYPE)
|
|
2382
|
+
register_output_types(OutputType.df_or_series, DATAFRAME_OR_SERIES_TYPE)
|
|
2383
|
+
register_output_types(OutputType.index, INDEX_TYPE)
|
|
2384
|
+
register_output_types(OutputType.categorical, CATEGORICAL_TYPE)
|
|
2385
|
+
register_output_types(OutputType.dataframe_groupby, DATAFRAME_GROUPBY_TYPE)
|
|
2386
|
+
register_output_types(OutputType.series_groupby, SERIES_GROUPBY_TYPE)
|