maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from numbers import Integral
|
|
16
|
+
from typing import List
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from ... import opcodes
|
|
22
|
+
from ...core import ENTITY_TYPE, EntityData, ExecutableTuple, OutputType
|
|
23
|
+
from ...serialization.serializables import (
|
|
24
|
+
AnyField,
|
|
25
|
+
BoolField,
|
|
26
|
+
Int32Field,
|
|
27
|
+
KeyField,
|
|
28
|
+
StringField,
|
|
29
|
+
)
|
|
30
|
+
from ...tensor import tensor as astensor
|
|
31
|
+
from ...tensor.core import TensorOrder
|
|
32
|
+
from ..core import INDEX_TYPE, SERIES_TYPE
|
|
33
|
+
from ..initializer import Series as asseries
|
|
34
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
35
|
+
from ..utils import parse_index
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DataFrameCut(DataFrameOperator, DataFrameOperatorMixin):
|
|
39
|
+
_op_type_ = opcodes.CUT
|
|
40
|
+
|
|
41
|
+
_input = KeyField("input")
|
|
42
|
+
bins = AnyField("bins", default=None)
|
|
43
|
+
right = BoolField("right", default=None)
|
|
44
|
+
labels = AnyField("labels", default=None)
|
|
45
|
+
retbins = BoolField("retbins", default=None)
|
|
46
|
+
precision = Int32Field("precision", default=None)
|
|
47
|
+
include_lowest = BoolField("include_lowest", default=None)
|
|
48
|
+
duplicates = StringField("duplicates", default=None)
|
|
49
|
+
ordered = BoolField("ordered", default=None)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def input(self):
|
|
53
|
+
return self._input
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def output_limit(self):
|
|
57
|
+
return 1 if not self.retbins else 2
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def _set_inputs(cls, op: "DataFrameCut", inputs: List[EntityData]):
|
|
61
|
+
super()._set_inputs(op, inputs)
|
|
62
|
+
inputs_iter = iter(op._inputs)
|
|
63
|
+
op._input = next(inputs_iter)
|
|
64
|
+
if isinstance(op.bins, ENTITY_TYPE):
|
|
65
|
+
op.bins = next(inputs_iter)
|
|
66
|
+
if isinstance(op.labels, ENTITY_TYPE):
|
|
67
|
+
op.labels = next(inputs_iter)
|
|
68
|
+
|
|
69
|
+
def __call__(self, x):
|
|
70
|
+
if isinstance(x, pd.Series):
|
|
71
|
+
x = asseries(x)
|
|
72
|
+
elif not isinstance(x, ENTITY_TYPE):
|
|
73
|
+
x = astensor(x)
|
|
74
|
+
if x.ndim != 1:
|
|
75
|
+
raise ValueError("Input array must be 1 dimensional")
|
|
76
|
+
if x.size == 0:
|
|
77
|
+
raise ValueError("Cannot cut empty array")
|
|
78
|
+
|
|
79
|
+
inputs = [x]
|
|
80
|
+
if self.labels is not None and not isinstance(self.labels, (bool, ENTITY_TYPE)):
|
|
81
|
+
self.labels = np.asarray(self.labels)
|
|
82
|
+
# infer dtype
|
|
83
|
+
x_empty = (
|
|
84
|
+
pd.Series([1], dtype=x.dtype)
|
|
85
|
+
if isinstance(x, SERIES_TYPE)
|
|
86
|
+
else np.asarray([1], dtype=x.dtype)
|
|
87
|
+
)
|
|
88
|
+
if isinstance(self.bins, INDEX_TYPE):
|
|
89
|
+
bins = self.bins.index_value.to_pandas()
|
|
90
|
+
inputs.append(self.bins)
|
|
91
|
+
bins_unknown = True
|
|
92
|
+
elif isinstance(self.bins, ENTITY_TYPE):
|
|
93
|
+
bins = np.asarray([2], dtype=self.bins.dtype)
|
|
94
|
+
inputs.append(self.bins)
|
|
95
|
+
bins_unknown = True
|
|
96
|
+
else:
|
|
97
|
+
bins = self.bins
|
|
98
|
+
bins_unknown = isinstance(self.bins, Integral)
|
|
99
|
+
if isinstance(self.labels, ENTITY_TYPE):
|
|
100
|
+
bins_unknown = True
|
|
101
|
+
labels = None
|
|
102
|
+
inputs.append(self.labels)
|
|
103
|
+
else:
|
|
104
|
+
if self.labels is False or not bins_unknown:
|
|
105
|
+
labels = self.labels
|
|
106
|
+
else:
|
|
107
|
+
labels = None
|
|
108
|
+
ret = pd.cut(
|
|
109
|
+
x_empty,
|
|
110
|
+
bins,
|
|
111
|
+
right=self.right,
|
|
112
|
+
labels=labels,
|
|
113
|
+
retbins=True,
|
|
114
|
+
include_lowest=self.include_lowest,
|
|
115
|
+
duplicates=self.duplicates,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
kws = []
|
|
119
|
+
output_types = []
|
|
120
|
+
if bins_unknown and isinstance(ret[0].dtype, pd.CategoricalDtype):
|
|
121
|
+
# inaccurate dtype, just create an empty one
|
|
122
|
+
out_dtype = pd.CategoricalDtype()
|
|
123
|
+
else:
|
|
124
|
+
out_dtype = ret[0].dtype
|
|
125
|
+
if isinstance(ret[0], pd.Series):
|
|
126
|
+
output_types.append(OutputType.series)
|
|
127
|
+
kws.append(
|
|
128
|
+
{
|
|
129
|
+
"dtype": out_dtype,
|
|
130
|
+
"shape": x.shape,
|
|
131
|
+
"index_value": x.index_value,
|
|
132
|
+
"name": x.name,
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
elif isinstance(ret[0], np.ndarray):
|
|
136
|
+
output_types.append(OutputType.tensor)
|
|
137
|
+
kws.append(
|
|
138
|
+
{"dtype": out_dtype, "shape": x.shape, "order": TensorOrder.C_ORDER}
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
assert isinstance(ret[0], pd.Categorical)
|
|
142
|
+
output_types.append(OutputType.categorical)
|
|
143
|
+
kws.append(
|
|
144
|
+
{
|
|
145
|
+
"dtype": out_dtype,
|
|
146
|
+
"shape": x.shape,
|
|
147
|
+
"categories_value": parse_index(
|
|
148
|
+
out_dtype.categories, store_data=True
|
|
149
|
+
),
|
|
150
|
+
}
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if self.retbins:
|
|
154
|
+
if isinstance(self.bins, (pd.IntervalIndex, INDEX_TYPE)):
|
|
155
|
+
output_types.append(OutputType.index)
|
|
156
|
+
kws.append(
|
|
157
|
+
{
|
|
158
|
+
"dtype": self.bins.dtype,
|
|
159
|
+
"shape": self.bins.shape,
|
|
160
|
+
"index_value": (
|
|
161
|
+
self.bins.index_value
|
|
162
|
+
if isinstance(self.bins, INDEX_TYPE)
|
|
163
|
+
else parse_index(self.bins, store_data=False)
|
|
164
|
+
),
|
|
165
|
+
"name": self.bins.name,
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
else:
|
|
169
|
+
output_types.append(OutputType.tensor)
|
|
170
|
+
kws.append(
|
|
171
|
+
{
|
|
172
|
+
"dtype": ret[1].dtype,
|
|
173
|
+
"shape": ret[1].shape if ret[1].size > 0 else (np.nan,),
|
|
174
|
+
"order": TensorOrder.C_ORDER,
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
self.output_types = output_types
|
|
179
|
+
return ExecutableTuple(self.new_tileables(inputs, kws=kws))
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def cut(
|
|
183
|
+
x,
|
|
184
|
+
bins,
|
|
185
|
+
right: bool = True,
|
|
186
|
+
labels=None,
|
|
187
|
+
retbins: bool = False,
|
|
188
|
+
precision: int = 3,
|
|
189
|
+
include_lowest: bool = False,
|
|
190
|
+
duplicates: str = "raise",
|
|
191
|
+
ordered: bool = True,
|
|
192
|
+
):
|
|
193
|
+
"""
|
|
194
|
+
Bin values into discrete intervals.
|
|
195
|
+
|
|
196
|
+
Use `cut` when you need to segment and sort data values into bins. This
|
|
197
|
+
function is also useful for going from a continuous variable to a
|
|
198
|
+
categorical variable. For example, `cut` could convert ages to groups of
|
|
199
|
+
age ranges. Supports binning into an equal number of bins, or a
|
|
200
|
+
pre-specified array of bins.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
x : array-like
|
|
205
|
+
The input array to be binned. Must be 1-dimensional.
|
|
206
|
+
bins : int, sequence of scalars, or IntervalIndex
|
|
207
|
+
The criteria to bin by.
|
|
208
|
+
|
|
209
|
+
* int : Defines the number of equal-width bins in the range of `x`. The
|
|
210
|
+
range of `x` is extended by .1% on each side to include the minimum
|
|
211
|
+
and maximum values of `x`.
|
|
212
|
+
* sequence of scalars : Defines the bin edges allowing for non-uniform
|
|
213
|
+
width. No extension of the range of `x` is done.
|
|
214
|
+
* IntervalIndex : Defines the exact bins to be used. Note that
|
|
215
|
+
IntervalIndex for `bins` must be non-overlapping.
|
|
216
|
+
|
|
217
|
+
right : bool, default True
|
|
218
|
+
Indicates whether `bins` includes the rightmost edge or not. If
|
|
219
|
+
``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
|
|
220
|
+
indicate (1,2], (2,3], (3,4]. This argument is ignored when
|
|
221
|
+
`bins` is an IntervalIndex.
|
|
222
|
+
labels : array or False, default None
|
|
223
|
+
Specifies the labels for the returned bins. Must be the same length as
|
|
224
|
+
the resulting bins. If False, returns only integer indicators of the
|
|
225
|
+
bins. This affects the type of the output container (see below).
|
|
226
|
+
This argument is ignored when `bins` is an IntervalIndex. If True,
|
|
227
|
+
raises an error.
|
|
228
|
+
retbins : bool, default False
|
|
229
|
+
Whether to return the bins or not. Useful when bins is provided
|
|
230
|
+
as a scalar.
|
|
231
|
+
precision : int, default 3
|
|
232
|
+
The precision at which to store and display the bins labels.
|
|
233
|
+
include_lowest : bool, default False
|
|
234
|
+
Whether the first interval should be left-inclusive or not.
|
|
235
|
+
duplicates : {default 'raise', 'drop'}, optional
|
|
236
|
+
If bin edges are not unique, raise ValueError or drop non-uniques.
|
|
237
|
+
ordered : bool, default True
|
|
238
|
+
Whether the labels are ordered or not. Applies to returned types
|
|
239
|
+
Categorical and Series (with Categorical dtype). If True, the resulting
|
|
240
|
+
categorical will be ordered. If False, the resulting categorical will be
|
|
241
|
+
unordered (labels must be provided).
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
out : Categorical, Series, or Tensor
|
|
246
|
+
An array-like object representing the respective bin for each value
|
|
247
|
+
of `x`. The type depends on the value of `labels`.
|
|
248
|
+
|
|
249
|
+
* True (default) : returns a Series for Series `x` or a
|
|
250
|
+
Categorical for all other inputs. The values stored within
|
|
251
|
+
are Interval dtype.
|
|
252
|
+
|
|
253
|
+
* sequence of scalars : returns a Series for Series `x` or a
|
|
254
|
+
Categorical for all other inputs. The values stored within
|
|
255
|
+
are whatever the type in the sequence is.
|
|
256
|
+
|
|
257
|
+
* False : returns a tensor of integers.
|
|
258
|
+
|
|
259
|
+
bins : Tensor or IntervalIndex.
|
|
260
|
+
The computed or specified bins. Only returned when `retbins=True`.
|
|
261
|
+
For scalar or sequence `bins`, this is a tensor with the computed
|
|
262
|
+
bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For
|
|
263
|
+
an IntervalIndex `bins`, this is equal to `bins`.
|
|
264
|
+
|
|
265
|
+
See Also
|
|
266
|
+
--------
|
|
267
|
+
qcut : Discretize variable into equal-sized buckets based on rank
|
|
268
|
+
or based on sample quantiles.
|
|
269
|
+
Categorical : Array type for storing data that come from a
|
|
270
|
+
fixed set of values.
|
|
271
|
+
Series : One-dimensional array with axis labels (including time series).
|
|
272
|
+
IntervalIndex : Immutable Index implementing an ordered, sliceable set.
|
|
273
|
+
|
|
274
|
+
Notes
|
|
275
|
+
-----
|
|
276
|
+
Any NA values will be NA in the result. Out of bounds values will be NA in
|
|
277
|
+
the resulting Series or Categorical object.
|
|
278
|
+
|
|
279
|
+
Examples
|
|
280
|
+
--------
|
|
281
|
+
Discretize into three equal-sized bins.
|
|
282
|
+
|
|
283
|
+
>>> import maxframe.tensor as mt
|
|
284
|
+
>>> import maxframe.dataframe as md
|
|
285
|
+
|
|
286
|
+
>>> md.cut(mt.array([1, 7, 5, 4, 6, 3]), 3).execute()
|
|
287
|
+
... # doctest: +ELLIPSIS
|
|
288
|
+
[(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
|
|
289
|
+
Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
|
|
290
|
+
|
|
291
|
+
>>> md.cut(mt.array([1, 7, 5, 4, 6, 3]), 3, retbins=True).execute()
|
|
292
|
+
... # doctest: +ELLIPSIS
|
|
293
|
+
([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
|
|
294
|
+
Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
|
|
295
|
+
array([0.994, 3. , 5. , 7. ]))
|
|
296
|
+
|
|
297
|
+
Discovers the same bins, but assign them specific labels. Notice that
|
|
298
|
+
the returned Categorical's categories are `labels` and is ordered.
|
|
299
|
+
|
|
300
|
+
>>> md.cut(mt.array([1, 7, 5, 4, 6, 3]),
|
|
301
|
+
... 3, labels=["bad", "medium", "good"]).execute()
|
|
302
|
+
[bad, good, medium, medium, good, bad]
|
|
303
|
+
Categories (3, object): [bad < medium < good]
|
|
304
|
+
|
|
305
|
+
ordered=False will result in unordered categories when labels are passed. This parameter
|
|
306
|
+
can be used to allow non-unique labels:
|
|
307
|
+
|
|
308
|
+
>>> md.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
|
|
309
|
+
... labels=["B", "A", "B"], ordered=False).execute()
|
|
310
|
+
['B', 'B', 'A', 'A', 'B', 'B']
|
|
311
|
+
Categories (2, object): ['A', 'B']
|
|
312
|
+
|
|
313
|
+
``labels=False`` implies you just want the bins back.
|
|
314
|
+
|
|
315
|
+
>>> md.cut([0, 1, 1, 2], bins=4, labels=False).execute()
|
|
316
|
+
array([0, 1, 1, 3])
|
|
317
|
+
|
|
318
|
+
Passing a Series as an input returns a Series with categorical dtype:
|
|
319
|
+
|
|
320
|
+
>>> s = md.Series(mt.array([2, 4, 6, 8, 10]),
|
|
321
|
+
... index=['a', 'b', 'c', 'd', 'e'])
|
|
322
|
+
>>> md.cut(s, 3).execute()
|
|
323
|
+
... # doctest: +ELLIPSIS
|
|
324
|
+
a (1.992, 4.667]
|
|
325
|
+
b (1.992, 4.667]
|
|
326
|
+
c (4.667, 7.333]
|
|
327
|
+
d (7.333, 10.0]
|
|
328
|
+
e (7.333, 10.0]
|
|
329
|
+
dtype: category
|
|
330
|
+
Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ...
|
|
331
|
+
|
|
332
|
+
Passing a Series as an input returns a Series with mapping value.
|
|
333
|
+
It is used to map numerically to intervals based on bins.
|
|
334
|
+
|
|
335
|
+
>>> s = md.Series(mt.array([2, 4, 6, 8, 10]),
|
|
336
|
+
... index=['a', 'b', 'c', 'd', 'e'])
|
|
337
|
+
>>> md.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False).execute()
|
|
338
|
+
... # doctest: +ELLIPSIS
|
|
339
|
+
(a 0.0
|
|
340
|
+
b 1.0
|
|
341
|
+
c 2.0
|
|
342
|
+
d 3.0
|
|
343
|
+
e NaN
|
|
344
|
+
dtype: float64, array([0, 2, 4, 6, 8, 10]))
|
|
345
|
+
|
|
346
|
+
Use `drop` optional when bins is not unique
|
|
347
|
+
|
|
348
|
+
>>> md.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
|
|
349
|
+
... right=False, duplicates='drop').execute()
|
|
350
|
+
... # doctest: +ELLIPSIS
|
|
351
|
+
(a 0.0
|
|
352
|
+
b 1.0
|
|
353
|
+
c 2.0
|
|
354
|
+
d 3.0
|
|
355
|
+
e NaN
|
|
356
|
+
dtype: float64, array([0, 2, 4, 6, 10]))
|
|
357
|
+
|
|
358
|
+
Passing an IntervalIndex for `bins` results in those categories exactly.
|
|
359
|
+
Notice that values not covered by the IntervalIndex are set to NaN. 0
|
|
360
|
+
is to the left of the first bin (which is closed on the right), and 1.5
|
|
361
|
+
falls between two bins.
|
|
362
|
+
|
|
363
|
+
>>> bins = md.Index(pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]))
|
|
364
|
+
>>> md.cut([0, 0.5, 1.5, 2.5, 4.5], bins).execute()
|
|
365
|
+
[NaN, (0, 1], NaN, (2, 3], (4, 5]]
|
|
366
|
+
Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]]
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
if isinstance(bins, Integral) and bins < 1:
|
|
370
|
+
raise ValueError("`bins` should be a positive integer")
|
|
371
|
+
|
|
372
|
+
op = DataFrameCut(
|
|
373
|
+
bins=bins,
|
|
374
|
+
right=right,
|
|
375
|
+
labels=labels,
|
|
376
|
+
retbins=retbins,
|
|
377
|
+
precision=precision,
|
|
378
|
+
include_lowest=include_lowest,
|
|
379
|
+
duplicates=duplicates,
|
|
380
|
+
ordered=ordered,
|
|
381
|
+
)
|
|
382
|
+
ret = op(x)
|
|
383
|
+
if not retbins:
|
|
384
|
+
return ret[0]
|
|
385
|
+
else:
|
|
386
|
+
return ret
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import EntityData
|
|
22
|
+
from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
|
|
23
|
+
from ..core import SERIES_TYPE
|
|
24
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
25
|
+
from ..utils import build_df, parse_index
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
29
|
+
_op_type_ = opcodes.DESCRIBE
|
|
30
|
+
|
|
31
|
+
input = KeyField("input", default=None)
|
|
32
|
+
percentiles = ListField("percentiles", FieldTypes.float64, default=None)
|
|
33
|
+
include = AnyField("include", default=None)
|
|
34
|
+
exclude = AnyField("exclude", default=None)
|
|
35
|
+
|
|
36
|
+
def __init__(self, output_types=None, **kw):
|
|
37
|
+
super().__init__(_output_types=output_types, **kw)
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def _set_inputs(cls, op: "DataFrameDescribe", inputs: List[EntityData]):
|
|
41
|
+
super()._set_inputs(op, inputs)
|
|
42
|
+
op.input = op._inputs[0]
|
|
43
|
+
|
|
44
|
+
def __call__(self, df_or_series):
|
|
45
|
+
if isinstance(df_or_series, SERIES_TYPE):
|
|
46
|
+
test_series = pd.Series([], dtype=df_or_series.dtype).describe(
|
|
47
|
+
percentiles=self.percentiles,
|
|
48
|
+
include=self.include,
|
|
49
|
+
exclude=self.exclude,
|
|
50
|
+
)
|
|
51
|
+
return self.new_series(
|
|
52
|
+
[df_or_series],
|
|
53
|
+
shape=(len(test_series),),
|
|
54
|
+
dtype=test_series.dtype,
|
|
55
|
+
index_value=parse_index(test_series.index, store_data=True),
|
|
56
|
+
)
|
|
57
|
+
else:
|
|
58
|
+
test_inp_df = build_df(df_or_series)
|
|
59
|
+
test_df = test_inp_df.describe(
|
|
60
|
+
percentiles=self.percentiles,
|
|
61
|
+
include=self.include,
|
|
62
|
+
exclude=self.exclude,
|
|
63
|
+
)
|
|
64
|
+
if len(self.percentiles) == 0:
|
|
65
|
+
# specify percentiles=False
|
|
66
|
+
# Note: unlike pandas that False is illegal value for percentiles,
|
|
67
|
+
# MaxFrame DataFrame allows user to specify percentiles=False
|
|
68
|
+
# to skip computation about percentiles
|
|
69
|
+
test_df.drop(["50%"], axis=0, inplace=True)
|
|
70
|
+
return self.new_dataframe(
|
|
71
|
+
[df_or_series],
|
|
72
|
+
shape=test_df.shape,
|
|
73
|
+
dtypes=test_df.dtypes,
|
|
74
|
+
index_value=parse_index(test_df.index, store_data=True),
|
|
75
|
+
columns_value=parse_index(test_df.columns, store_data=True),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def describe(df_or_series, percentiles=None, include=None, exclude=None):
|
|
80
|
+
"""
|
|
81
|
+
Generate descriptive statistics.
|
|
82
|
+
|
|
83
|
+
Descriptive statistics include those that summarize the central
|
|
84
|
+
tendency, dispersion and shape of a
|
|
85
|
+
dataset's distribution, excluding ``NaN`` values.
|
|
86
|
+
|
|
87
|
+
Analyzes both numeric and object series, as well
|
|
88
|
+
as ``DataFrame`` column sets of mixed data types. The output
|
|
89
|
+
will vary depending on what is provided. Refer to the notes
|
|
90
|
+
below for more detail.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
percentiles : list-like of numbers, optional
|
|
95
|
+
The percentiles to include in the output. All should
|
|
96
|
+
fall between 0 and 1. The default is
|
|
97
|
+
``[.25, .5, .75]``, which returns the 25th, 50th, and
|
|
98
|
+
75th percentiles.
|
|
99
|
+
include : 'all', list-like of dtypes or None (default), optional
|
|
100
|
+
A white list of data types to include in the result. Ignored
|
|
101
|
+
for ``Series``. Here are the options:
|
|
102
|
+
|
|
103
|
+
- 'all' : All columns of the input will be included in the output.
|
|
104
|
+
- A list-like of dtypes : Limits the results to the
|
|
105
|
+
provided data types.
|
|
106
|
+
To limit the result to numeric types submit
|
|
107
|
+
``numpy.number``. To limit it instead to object columns submit
|
|
108
|
+
the ``numpy.object`` data type. Strings
|
|
109
|
+
can also be used in the style of
|
|
110
|
+
``select_dtypes`` (e.g. ``df.describe(include=['O'])``).
|
|
111
|
+
- None (default) : The result will include all numeric columns.
|
|
112
|
+
exclude : list-like of dtypes or None (default), optional,
|
|
113
|
+
A black list of data types to omit from the result. Ignored
|
|
114
|
+
for ``Series``. Here are the options:
|
|
115
|
+
|
|
116
|
+
- A list-like of dtypes : Excludes the provided data types
|
|
117
|
+
from the result. To exclude numeric types submit
|
|
118
|
+
``numpy.number``. To exclude object columns submit the data
|
|
119
|
+
type ``numpy.object``. Strings can also be used in the style of
|
|
120
|
+
``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``).
|
|
121
|
+
- None (default) : The result will exclude nothing.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
Series or DataFrame
|
|
126
|
+
Summary statistics of the Series or Dataframe provided.
|
|
127
|
+
|
|
128
|
+
See Also
|
|
129
|
+
--------
|
|
130
|
+
DataFrame.count: Count number of non-NA/null observations.
|
|
131
|
+
DataFrame.max: Maximum of the values in the object.
|
|
132
|
+
DataFrame.min: Minimum of the values in the object.
|
|
133
|
+
DataFrame.mean: Mean of the values.
|
|
134
|
+
DataFrame.std: Standard deviation of the observations.
|
|
135
|
+
DataFrame.select_dtypes: Subset of a DataFrame including/excluding
|
|
136
|
+
columns based on their dtype.
|
|
137
|
+
|
|
138
|
+
Notes
|
|
139
|
+
-----
|
|
140
|
+
For numeric data, the result's index will include ``count``,
|
|
141
|
+
``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
|
|
142
|
+
upper percentiles. By default the lower percentile is ``25`` and the
|
|
143
|
+
upper percentile is ``75``. The ``50`` percentile is the
|
|
144
|
+
same as the median.
|
|
145
|
+
|
|
146
|
+
For object data (e.g. strings or timestamps), the result's index
|
|
147
|
+
will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
|
|
148
|
+
is the most common value. The ``freq`` is the most common value's
|
|
149
|
+
frequency. Timestamps also include the ``first`` and ``last`` items.
|
|
150
|
+
|
|
151
|
+
If multiple object values have the highest count, then the
|
|
152
|
+
``count`` and ``top`` results will be arbitrarily chosen from
|
|
153
|
+
among those with the highest count.
|
|
154
|
+
|
|
155
|
+
For mixed data types provided via a ``DataFrame``, the default is to
|
|
156
|
+
return only an analysis of numeric columns. If the dataframe consists
|
|
157
|
+
only of object data without any numeric columns, the default is to
|
|
158
|
+
return an analysis of object columns. If ``include='all'`` is provided
|
|
159
|
+
as an option, the result will include a union of attributes of each type.
|
|
160
|
+
|
|
161
|
+
The `include` and `exclude` parameters can be used to limit
|
|
162
|
+
which columns in a ``DataFrame`` are analyzed for the output.
|
|
163
|
+
The parameters are ignored when analyzing a ``Series``.
|
|
164
|
+
|
|
165
|
+
Examples
|
|
166
|
+
--------
|
|
167
|
+
Describing a numeric ``Series``.
|
|
168
|
+
|
|
169
|
+
>>> import maxframe.tensor as mt
|
|
170
|
+
>>> import maxframe.dataframe as md
|
|
171
|
+
>>> s = md.Series([1, 2, 3])
|
|
172
|
+
>>> s.describe().execute()
|
|
173
|
+
count 3.0
|
|
174
|
+
mean 2.0
|
|
175
|
+
std 1.0
|
|
176
|
+
min 1.0
|
|
177
|
+
25% 1.5
|
|
178
|
+
50% 2.0
|
|
179
|
+
75% 2.5
|
|
180
|
+
max 3.0
|
|
181
|
+
dtype: float64
|
|
182
|
+
|
|
183
|
+
Describing a ``DataFrame``. By default only numeric fields
|
|
184
|
+
are returned.
|
|
185
|
+
|
|
186
|
+
>>> df = md.DataFrame({'numeric': [1, 2, 3],
|
|
187
|
+
... 'object': ['a', 'b', 'c']
|
|
188
|
+
... })
|
|
189
|
+
>>> df.describe().execute()
|
|
190
|
+
numeric
|
|
191
|
+
count 3.0
|
|
192
|
+
mean 2.0
|
|
193
|
+
std 1.0
|
|
194
|
+
min 1.0
|
|
195
|
+
25% 1.5
|
|
196
|
+
50% 2.0
|
|
197
|
+
75% 2.5
|
|
198
|
+
max 3.0
|
|
199
|
+
|
|
200
|
+
Describing all columns of a ``DataFrame`` regardless of data type.
|
|
201
|
+
|
|
202
|
+
>>> df.describe(include='all').execute() # doctest: +SKIP.execute()
|
|
203
|
+
numeric object
|
|
204
|
+
count 3.0 3
|
|
205
|
+
unique NaN 3
|
|
206
|
+
top NaN a
|
|
207
|
+
freq NaN 1
|
|
208
|
+
mean 2.0 NaN
|
|
209
|
+
std 1.0 NaN
|
|
210
|
+
min 1.0 NaN
|
|
211
|
+
25% 1.5 NaN
|
|
212
|
+
50% 2.0 NaN
|
|
213
|
+
75% 2.5 NaN
|
|
214
|
+
max 3.0 NaN
|
|
215
|
+
|
|
216
|
+
Describing a column from a ``DataFrame`` by accessing it as
|
|
217
|
+
an attribute.
|
|
218
|
+
|
|
219
|
+
>>> df.numeric.describe().execute()
|
|
220
|
+
count 3.0
|
|
221
|
+
mean 2.0
|
|
222
|
+
std 1.0
|
|
223
|
+
min 1.0
|
|
224
|
+
25% 1.5
|
|
225
|
+
50% 2.0
|
|
226
|
+
75% 2.5
|
|
227
|
+
max 3.0
|
|
228
|
+
Name: numeric, dtype: float64
|
|
229
|
+
|
|
230
|
+
Including only numeric columns in a ``DataFrame`` description.
|
|
231
|
+
|
|
232
|
+
>>> df.describe(include=[mt.number]).execute()
|
|
233
|
+
numeric
|
|
234
|
+
count 3.0
|
|
235
|
+
mean 2.0
|
|
236
|
+
std 1.0
|
|
237
|
+
min 1.0
|
|
238
|
+
25% 1.5
|
|
239
|
+
50% 2.0
|
|
240
|
+
75% 2.5
|
|
241
|
+
max 3.0
|
|
242
|
+
|
|
243
|
+
Including only string columns in a ``DataFrame`` description.
|
|
244
|
+
|
|
245
|
+
>>> df.describe(include=[object]).execute() # doctest: +SKIP.execute()
|
|
246
|
+
object
|
|
247
|
+
count 3
|
|
248
|
+
unique 3
|
|
249
|
+
top a
|
|
250
|
+
freq 1
|
|
251
|
+
"""
|
|
252
|
+
# fixme add support for categorical columns once implemented
|
|
253
|
+
if percentiles is False:
|
|
254
|
+
percentiles = []
|
|
255
|
+
elif percentiles is None:
|
|
256
|
+
percentiles = [0.25, 0.5, 0.75]
|
|
257
|
+
else:
|
|
258
|
+
percentiles = list(percentiles)
|
|
259
|
+
if percentiles is not None:
|
|
260
|
+
for p in percentiles:
|
|
261
|
+
if p < 0 or p > 1:
|
|
262
|
+
raise ValueError(
|
|
263
|
+
"percentiles should all be in the interval [0, 1]. "
|
|
264
|
+
"Try [{0:.3f}] instead.".format(p / 100)
|
|
265
|
+
)
|
|
266
|
+
# median should always be included
|
|
267
|
+
if 0.5 not in percentiles:
|
|
268
|
+
percentiles.append(0.5)
|
|
269
|
+
percentiles = np.asarray(percentiles)
|
|
270
|
+
|
|
271
|
+
# sort and check for duplicates
|
|
272
|
+
unique_pcts = np.unique(percentiles)
|
|
273
|
+
if len(unique_pcts) < len(percentiles):
|
|
274
|
+
raise ValueError("percentiles cannot contain duplicates")
|
|
275
|
+
percentiles = unique_pcts.tolist()
|
|
276
|
+
|
|
277
|
+
op = DataFrameDescribe(percentiles=percentiles, include=include, exclude=exclude)
|
|
278
|
+
return op(df_or_series)
|