maxframe 2.4.0rc1__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxframe/__init__.py +33 -0
- maxframe/_utils.cp312-win32.pyd +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +561 -0
- maxframe/codegen/__init__.py +27 -0
- maxframe/codegen/core.py +597 -0
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +38 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +312 -0
- maxframe/codegen/spe/dataframe/indexing.py +333 -0
- maxframe/codegen/spe/dataframe/merge.py +110 -0
- maxframe/codegen/spe/dataframe/misc.py +264 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +183 -0
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +104 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +55 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +31 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +166 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +90 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +175 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +68 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
- maxframe/codegen/spe/utils.py +56 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/codegen/tests/test_codegen.py +67 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +630 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +114 -0
- maxframe/config/tests/test_validators.py +46 -0
- maxframe/config/validators.py +142 -0
- maxframe/conftest.py +261 -0
- maxframe/core/__init__.py +53 -0
- maxframe/core/accessor.py +45 -0
- maxframe/core/base.py +157 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +34 -0
- maxframe/core/entity/core.py +150 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/objects.py +115 -0
- maxframe/core/entity/output_types.py +101 -0
- maxframe/core/entity/tests/__init__.py +13 -0
- maxframe/core/entity/tests/test_objects.py +42 -0
- maxframe/core/entity/tileables.py +376 -0
- maxframe/core/entity/utils.py +39 -0
- maxframe/core/graph/__init__.py +22 -0
- maxframe/core/graph/builder/__init__.py +15 -0
- maxframe/core/graph/builder/base.py +90 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +37 -0
- maxframe/core/graph/core.cp312-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +478 -0
- maxframe/core/graph/entity.py +187 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +101 -0
- maxframe/core/operator/__init__.py +32 -0
- maxframe/core/operator/base.py +481 -0
- maxframe/core/operator/core.py +307 -0
- maxframe/core/operator/fetch.py +40 -0
- maxframe/core/operator/objects.py +43 -0
- maxframe/core/operator/shuffle.py +45 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/operator/utils.py +68 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +90 -0
- maxframe/dataframe/accessors/__init__.py +20 -0
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/accessors/datetime_/core.py +106 -0
- maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +45 -0
- maxframe/dataframe/accessors/dict_/accessor.py +39 -0
- maxframe/dataframe/accessors/dict_/contains.py +72 -0
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +140 -0
- maxframe/dataframe/accessors/dict_/length.py +64 -0
- maxframe/dataframe/accessors/dict_/remove.py +75 -0
- maxframe/dataframe/accessors/dict_/setitem.py +79 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
- maxframe/dataframe/accessors/list_/__init__.py +39 -0
- maxframe/dataframe/accessors/list_/accessor.py +39 -0
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +128 -0
- maxframe/dataframe/accessors/list_/length.py +64 -0
- maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
- maxframe/dataframe/accessors/plotting/__init__.py +40 -0
- maxframe/dataframe/accessors/plotting/core.py +78 -0
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
- maxframe/dataframe/accessors/string_/__init__.py +36 -0
- maxframe/dataframe/accessors/string_/accessor.py +215 -0
- maxframe/dataframe/accessors/string_/core.py +226 -0
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/accessors/struct_/__init__.py +39 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +373 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +361 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +416 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/equal.py +58 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +59 -0
- maxframe/dataframe/arithmetic/greater_equal.py +59 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +59 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +58 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/round.py +144 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/core.py +2386 -0
- maxframe/dataframe/datasource/__init__.py +33 -0
- maxframe/dataframe/datasource/core.py +112 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +512 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +58 -0
- maxframe/dataframe/datasource/from_records.py +191 -0
- maxframe/dataframe/datasource/from_tensor.py +503 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +534 -0
- maxframe/dataframe/datasource/read_odps_query.py +536 -0
- maxframe/dataframe/datasource/read_odps_table.py +295 -0
- maxframe/dataframe/datasource/read_parquet.py +278 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
- maxframe/dataframe/datastore/__init__.py +41 -0
- maxframe/dataframe/datastore/core.py +28 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
- maxframe/dataframe/datastore/to_csv.py +219 -0
- maxframe/dataframe/datastore/to_json.py +215 -0
- maxframe/dataframe/datastore/to_odps.py +285 -0
- maxframe/dataframe/datastore/to_parquet.py +121 -0
- maxframe/dataframe/extensions/__init__.py +70 -0
- maxframe/dataframe/extensions/accessor.py +35 -0
- maxframe/dataframe/extensions/apply_chunk.py +733 -0
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +133 -0
- maxframe/dataframe/extensions/flatmap.py +329 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +97 -0
- maxframe/dataframe/groupby/__init__.py +105 -0
- maxframe/dataframe/groupby/aggregation.py +485 -0
- maxframe/dataframe/groupby/apply.py +235 -0
- maxframe/dataframe/groupby/apply_chunk.py +407 -0
- maxframe/dataframe/groupby/core.py +342 -0
- maxframe/dataframe/groupby/cum.py +102 -0
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +149 -0
- maxframe/dataframe/groupby/getitem.py +105 -0
- maxframe/dataframe/groupby/head.py +115 -0
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
- maxframe/dataframe/groupby/transform.py +264 -0
- maxframe/dataframe/indexing/__init__.py +104 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +350 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/getitem.py +205 -0
- maxframe/dataframe/indexing/iat.py +82 -0
- maxframe/dataframe/indexing/iloc.py +711 -0
- maxframe/dataframe/indexing/insert.py +118 -0
- maxframe/dataframe/indexing/loc.py +694 -0
- maxframe/dataframe/indexing/reindex.py +541 -0
- maxframe/dataframe/indexing/rename.py +445 -0
- maxframe/dataframe/indexing/rename_axis.py +217 -0
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +427 -0
- maxframe/dataframe/indexing/sample.py +232 -0
- maxframe/dataframe/indexing/set_axis.py +197 -0
- maxframe/dataframe/indexing/set_index.py +128 -0
- maxframe/dataframe/indexing/setitem.py +133 -0
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +300 -0
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/initializer.py +298 -0
- maxframe/dataframe/merge/__init__.py +53 -0
- maxframe/dataframe/merge/append.py +120 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +500 -0
- maxframe/dataframe/merge/merge.py +806 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +390 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +145 -0
- maxframe/dataframe/misc/_duplicate.py +56 -0
- maxframe/dataframe/misc/apply.py +730 -0
- maxframe/dataframe/misc/astype.py +237 -0
- maxframe/dataframe/misc/case_when.py +145 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/cut.py +386 -0
- maxframe/dataframe/misc/describe.py +278 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +473 -0
- maxframe/dataframe/misc/drop_duplicates.py +251 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +730 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/factorize.py +160 -0
- maxframe/dataframe/misc/get_dummies.py +241 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +220 -0
- maxframe/dataframe/misc/map.py +360 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +68 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +259 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +649 -0
- maxframe/dataframe/misc/to_numeric.py +181 -0
- maxframe/dataframe/misc/transform.py +346 -0
- maxframe/dataframe/misc/transpose.py +148 -0
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +206 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +231 -0
- maxframe/dataframe/missing/dropna.py +294 -0
- maxframe/dataframe/missing/fillna.py +283 -0
- maxframe/dataframe/missing/replace.py +446 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +90 -0
- maxframe/dataframe/operators.py +231 -0
- maxframe/dataframe/reduction/__init__.py +129 -0
- maxframe/dataframe/reduction/aggregation.py +502 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +923 -0
- maxframe/dataframe/reduction/count.py +63 -0
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +111 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +63 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/mode.py +190 -0
- maxframe/dataframe/reduction/nunique.py +149 -0
- maxframe/dataframe/reduction/prod.py +81 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +73 -0
- maxframe/dataframe/reduction/skew.py +93 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +51 -0
- maxframe/dataframe/reduction/sum.py +81 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
- maxframe/dataframe/reduction/unique.py +153 -0
- maxframe/dataframe/reduction/var.py +76 -0
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/reshape/melt.py +169 -0
- maxframe/dataframe/reshape/pivot.py +233 -0
- maxframe/dataframe/reshape/pivot_table.py +275 -0
- maxframe/dataframe/reshape/stack.py +240 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +49 -0
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +37 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +308 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +85 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +284 -0
- maxframe/dataframe/statistics/quantile.py +338 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +60 -0
- maxframe/dataframe/tests/test_typing.py +119 -0
- maxframe/dataframe/tests/test_utils.py +169 -0
- maxframe/dataframe/tseries/__init__.py +32 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +299 -0
- maxframe/dataframe/typing_.py +196 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +53 -0
- maxframe/dataframe/utils.py +1728 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +100 -0
- maxframe/dataframe/window/core.py +82 -0
- maxframe/dataframe/window/ewm.py +247 -0
- maxframe/dataframe/window/expanding.py +151 -0
- maxframe/dataframe/window/rolling.py +389 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +60 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +37 -0
- maxframe/errors.py +52 -0
- maxframe/extension.py +131 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +156 -0
- maxframe/io/objects/tensor.py +133 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +85 -0
- maxframe/io/odpsio/__init__.py +24 -0
- maxframe/io/odpsio/arrow.py +161 -0
- maxframe/io/odpsio/schema.py +533 -0
- maxframe/io/odpsio/tableio.py +736 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/io/odpsio/tests/test_arrow.py +132 -0
- maxframe/io/odpsio/tests/test_schema.py +582 -0
- maxframe/io/odpsio/tests/test_tableio.py +205 -0
- maxframe/io/odpsio/tests/test_volumeio.py +75 -0
- maxframe/io/odpsio/volumeio.py +102 -0
- maxframe/learn/__init__.py +25 -0
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +216 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/__init__.py +17 -0
- maxframe/learn/contrib/llm/core.py +105 -0
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +16 -0
- maxframe/learn/contrib/llm/models/dashscope.py +114 -0
- maxframe/learn/contrib/llm/models/managed.py +119 -0
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/multi_modal.py +135 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +608 -0
- maxframe/learn/contrib/models.py +109 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +108 -0
- maxframe/learn/contrib/xgboost/__init__.py +33 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +119 -0
- maxframe/learn/contrib/xgboost/core.py +469 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
- maxframe/learn/contrib/xgboost/predict.py +133 -0
- maxframe/learn/contrib/xgboost/regressor.py +91 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +181 -0
- maxframe/learn/core.py +344 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +220 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +31 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1266 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +20 -0
- maxframe/learn/utils/_encode.py +312 -0
- maxframe/learn/utils/checks.py +160 -0
- maxframe/learn/utils/core.py +121 -0
- maxframe/learn/utils/extmath.py +246 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +13 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compat.py +185 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/dtypes_extension/__init__.py +30 -0
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +106 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/__init__.py +22 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +274 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
- maxframe/lib/filesystem/arrow.py +240 -0
- maxframe/lib/filesystem/base.py +327 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fshandler.py +136 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +120 -0
- maxframe/lib/filesystem/oss.py +283 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
- maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
- maxframe/lib/filesystem/tests/test_oss.py +220 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cp312-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +856 -0
- maxframe/lib/sparse/array.py +1616 -0
- maxframe/lib/sparse/core.py +90 -0
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +244 -0
- maxframe/lib/sparse/tests/__init__.py +13 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +148 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +177 -0
- maxframe/mixin.py +157 -0
- maxframe/opcodes.py +654 -0
- maxframe/protocol.py +611 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +212 -0
- maxframe/remote/run_script.py +124 -0
- maxframe/serialization/__init__.py +39 -0
- maxframe/serialization/arrow.py +107 -0
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp312-win32.pyd +0 -0
- maxframe/serialization/core.pxd +50 -0
- maxframe/serialization/core.pyi +66 -0
- maxframe/serialization/core.pyx +1282 -0
- maxframe/serialization/exception.py +90 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +110 -0
- maxframe/serialization/pandas.py +278 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +469 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +592 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +119 -0
- maxframe/serialization/serializables/tests/test_serializable.py +313 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +516 -0
- maxframe/session.py +1250 -0
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +325 -0
- maxframe/tensor/arithmetic/__init__.py +322 -0
- maxframe/tensor/arithmetic/abs.py +66 -0
- maxframe/tensor/arithmetic/absolute.py +66 -0
- maxframe/tensor/arithmetic/add.py +112 -0
- maxframe/tensor/arithmetic/angle.py +70 -0
- maxframe/tensor/arithmetic/arccos.py +101 -0
- maxframe/tensor/arithmetic/arccosh.py +89 -0
- maxframe/tensor/arithmetic/arcsin.py +92 -0
- maxframe/tensor/arithmetic/arcsinh.py +84 -0
- maxframe/tensor/arithmetic/arctan.py +104 -0
- maxframe/tensor/arithmetic/arctan2.py +126 -0
- maxframe/tensor/arithmetic/arctanh.py +84 -0
- maxframe/tensor/arithmetic/around.py +112 -0
- maxframe/tensor/arithmetic/bitand.py +93 -0
- maxframe/tensor/arithmetic/bitor.py +100 -0
- maxframe/tensor/arithmetic/bitxor.py +93 -0
- maxframe/tensor/arithmetic/cbrt.py +64 -0
- maxframe/tensor/arithmetic/ceil.py +69 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +72 -0
- maxframe/tensor/arithmetic/copysign.py +76 -0
- maxframe/tensor/arithmetic/core.py +546 -0
- maxframe/tensor/arithmetic/cos.py +83 -0
- maxframe/tensor/arithmetic/cosh.py +70 -0
- maxframe/tensor/arithmetic/deg2rad.py +70 -0
- maxframe/tensor/arithmetic/degrees.py +75 -0
- maxframe/tensor/arithmetic/divide.py +112 -0
- maxframe/tensor/arithmetic/equal.py +74 -0
- maxframe/tensor/arithmetic/exp.py +104 -0
- maxframe/tensor/arithmetic/exp2.py +65 -0
- maxframe/tensor/arithmetic/expm1.py +77 -0
- maxframe/tensor/arithmetic/fabs.py +72 -0
- maxframe/tensor/arithmetic/fix.py +67 -0
- maxframe/tensor/arithmetic/float_power.py +101 -0
- maxframe/tensor/arithmetic/floor.py +75 -0
- maxframe/tensor/arithmetic/floordiv.py +92 -0
- maxframe/tensor/arithmetic/fmax.py +103 -0
- maxframe/tensor/arithmetic/fmin.py +104 -0
- maxframe/tensor/arithmetic/fmod.py +97 -0
- maxframe/tensor/arithmetic/frexp.py +96 -0
- maxframe/tensor/arithmetic/greater.py +75 -0
- maxframe/tensor/arithmetic/greater_equal.py +67 -0
- maxframe/tensor/arithmetic/hypot.py +75 -0
- maxframe/tensor/arithmetic/i0.py +87 -0
- maxframe/tensor/arithmetic/imag.py +65 -0
- maxframe/tensor/arithmetic/invert.py +108 -0
- maxframe/tensor/arithmetic/isclose.py +114 -0
- maxframe/tensor/arithmetic/iscomplex.py +62 -0
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/isfinite.py +104 -0
- maxframe/tensor/arithmetic/isinf.py +101 -0
- maxframe/tensor/arithmetic/isnan.py +80 -0
- maxframe/tensor/arithmetic/isreal.py +61 -0
- maxframe/tensor/arithmetic/ldexp.py +97 -0
- maxframe/tensor/arithmetic/less.py +67 -0
- maxframe/tensor/arithmetic/less_equal.py +67 -0
- maxframe/tensor/arithmetic/log.py +90 -0
- maxframe/tensor/arithmetic/log10.py +83 -0
- maxframe/tensor/arithmetic/log1p.py +93 -0
- maxframe/tensor/arithmetic/log2.py +83 -0
- maxframe/tensor/arithmetic/logaddexp.py +78 -0
- maxframe/tensor/arithmetic/logaddexp2.py +76 -0
- maxframe/tensor/arithmetic/logical_and.py +79 -0
- maxframe/tensor/arithmetic/logical_not.py +72 -0
- maxframe/tensor/arithmetic/logical_or.py +80 -0
- maxframe/tensor/arithmetic/logical_xor.py +86 -0
- maxframe/tensor/arithmetic/lshift.py +80 -0
- maxframe/tensor/arithmetic/maximum.py +106 -0
- maxframe/tensor/arithmetic/minimum.py +106 -0
- maxframe/tensor/arithmetic/mod.py +102 -0
- maxframe/tensor/arithmetic/modf.py +87 -0
- maxframe/tensor/arithmetic/multiply.py +114 -0
- maxframe/tensor/arithmetic/nan_to_num.py +97 -0
- maxframe/tensor/arithmetic/negative.py +63 -0
- maxframe/tensor/arithmetic/nextafter.py +66 -0
- maxframe/tensor/arithmetic/not_equal.py +70 -0
- maxframe/tensor/arithmetic/positive.py +45 -0
- maxframe/tensor/arithmetic/power.py +104 -0
- maxframe/tensor/arithmetic/rad2deg.py +69 -0
- maxframe/tensor/arithmetic/radians.py +75 -0
- maxframe/tensor/arithmetic/real.py +68 -0
- maxframe/tensor/arithmetic/reciprocal.py +78 -0
- maxframe/tensor/arithmetic/rint.py +66 -0
- maxframe/tensor/arithmetic/rshift.py +79 -0
- maxframe/tensor/arithmetic/setimag.py +27 -0
- maxframe/tensor/arithmetic/setreal.py +27 -0
- maxframe/tensor/arithmetic/sign.py +79 -0
- maxframe/tensor/arithmetic/signbit.py +63 -0
- maxframe/tensor/arithmetic/sin.py +96 -0
- maxframe/tensor/arithmetic/sinc.py +100 -0
- maxframe/tensor/arithmetic/sinh.py +91 -0
- maxframe/tensor/arithmetic/spacing.py +70 -0
- maxframe/tensor/arithmetic/sqrt.py +79 -0
- maxframe/tensor/arithmetic/square.py +67 -0
- maxframe/tensor/arithmetic/subtract.py +83 -0
- maxframe/tensor/arithmetic/tan.py +86 -0
- maxframe/tensor/arithmetic/tanh.py +90 -0
- maxframe/tensor/arithmetic/tests/__init__.py +13 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
- maxframe/tensor/arithmetic/truediv.py +102 -0
- maxframe/tensor/arithmetic/trunc.py +70 -0
- maxframe/tensor/arithmetic/utils.py +91 -0
- maxframe/tensor/array_utils.py +164 -0
- maxframe/tensor/core.py +597 -0
- maxframe/tensor/datasource/__init__.py +40 -0
- maxframe/tensor/datasource/arange.py +154 -0
- maxframe/tensor/datasource/array.py +399 -0
- maxframe/tensor/datasource/core.py +114 -0
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +167 -0
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +68 -0
- maxframe/tensor/datasource/from_dense.py +37 -0
- maxframe/tensor/datasource/from_sparse.py +45 -0
- maxframe/tensor/datasource/full.py +184 -0
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +178 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +310 -0
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +192 -0
- maxframe/tensor/extensions/__init__.py +33 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +198 -0
- maxframe/tensor/indexing/compress.py +122 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +69 -0
- maxframe/tensor/indexing/fill_diagonal.py +180 -0
- maxframe/tensor/indexing/flatnonzero.py +58 -0
- maxframe/tensor/indexing/getitem.py +144 -0
- maxframe/tensor/indexing/nonzero.py +118 -0
- maxframe/tensor/indexing/setitem.py +142 -0
- maxframe/tensor/indexing/slice.py +32 -0
- maxframe/tensor/indexing/take.py +128 -0
- maxframe/tensor/indexing/tests/__init__.py +13 -0
- maxframe/tensor/indexing/tests/test_indexing.py +232 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +43 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/merge/__init__.py +21 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +103 -0
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +130 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +79 -0
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/misc/__init__.py +72 -0
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/astype.py +121 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/broadcast_to.py +89 -0
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +90 -0
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/misc/tests/test_misc.py +112 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/transpose.py +133 -0
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +227 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/misc/where.py +129 -0
- maxframe/tensor/operators.py +83 -0
- maxframe/tensor/random/__init__.py +166 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +135 -0
- maxframe/tensor/random/bytes.py +37 -0
- maxframe/tensor/random/chisquare.py +108 -0
- maxframe/tensor/random/choice.py +187 -0
- maxframe/tensor/random/core.py +249 -0
- maxframe/tensor/random/dirichlet.py +121 -0
- maxframe/tensor/random/exponential.py +92 -0
- maxframe/tensor/random/f.py +133 -0
- maxframe/tensor/random/gamma.py +126 -0
- maxframe/tensor/random/geometric.py +91 -0
- maxframe/tensor/random/gumbel.py +165 -0
- maxframe/tensor/random/hypergeometric.py +146 -0
- maxframe/tensor/random/laplace.py +131 -0
- maxframe/tensor/random/logistic.py +127 -0
- maxframe/tensor/random/lognormal.py +157 -0
- maxframe/tensor/random/logseries.py +120 -0
- maxframe/tensor/random/multinomial.py +131 -0
- maxframe/tensor/random/multivariate_normal.py +190 -0
- maxframe/tensor/random/negative_binomial.py +123 -0
- maxframe/tensor/random/noncentral_chisquare.py +130 -0
- maxframe/tensor/random/noncentral_f.py +124 -0
- maxframe/tensor/random/normal.py +141 -0
- maxframe/tensor/random/pareto.py +138 -0
- maxframe/tensor/random/permutation.py +107 -0
- maxframe/tensor/random/poisson.py +109 -0
- maxframe/tensor/random/power.py +140 -0
- maxframe/tensor/random/rand.py +80 -0
- maxframe/tensor/random/randint.py +119 -0
- maxframe/tensor/random/randn.py +94 -0
- maxframe/tensor/random/random_integers.py +121 -0
- maxframe/tensor/random/random_sample.py +84 -0
- maxframe/tensor/random/rayleigh.py +108 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +103 -0
- maxframe/tensor/random/standard_exponential.py +70 -0
- maxframe/tensor/random/standard_gamma.py +118 -0
- maxframe/tensor/random/standard_normal.py +72 -0
- maxframe/tensor/random/standard_t.py +133 -0
- maxframe/tensor/random/tests/__init__.py +13 -0
- maxframe/tensor/random/tests/test_random.py +165 -0
- maxframe/tensor/random/triangular.py +117 -0
- maxframe/tensor/random/uniform.py +129 -0
- maxframe/tensor/random/vonmises.py +129 -0
- maxframe/tensor/random/wald.py +112 -0
- maxframe/tensor/random/weibull.py +138 -0
- maxframe/tensor/random/zipf.py +120 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +64 -0
- maxframe/tensor/reduction/all.py +101 -0
- maxframe/tensor/reduction/allclose.py +86 -0
- maxframe/tensor/reduction/any.py +103 -0
- maxframe/tensor/reduction/argmax.py +101 -0
- maxframe/tensor/reduction/argmin.py +101 -0
- maxframe/tensor/reduction/array_equal.py +63 -0
- maxframe/tensor/reduction/core.py +166 -0
- maxframe/tensor/reduction/count_nonzero.py +80 -0
- maxframe/tensor/reduction/cumprod.py +95 -0
- maxframe/tensor/reduction/cumsum.py +99 -0
- maxframe/tensor/reduction/max.py +118 -0
- maxframe/tensor/reduction/mean.py +122 -0
- maxframe/tensor/reduction/min.py +118 -0
- maxframe/tensor/reduction/nanargmax.py +80 -0
- maxframe/tensor/reduction/nanargmin.py +74 -0
- maxframe/tensor/reduction/nancumprod.py +89 -0
- maxframe/tensor/reduction/nancumsum.py +92 -0
- maxframe/tensor/reduction/nanmax.py +109 -0
- maxframe/tensor/reduction/nanmean.py +105 -0
- maxframe/tensor/reduction/nanmin.py +109 -0
- maxframe/tensor/reduction/nanprod.py +92 -0
- maxframe/tensor/reduction/nanstd.py +124 -0
- maxframe/tensor/reduction/nansum.py +113 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +128 -0
- maxframe/tensor/reduction/std.py +132 -0
- maxframe/tensor/reduction/sum.py +123 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +189 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +15 -0
- maxframe/tensor/reshape/reshape.py +192 -0
- maxframe/tensor/reshape/tests/__init__.py +13 -0
- maxframe/tensor/reshape/tests/test_reshape.py +35 -0
- maxframe/tensor/sort/__init__.py +18 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +175 -0
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +99 -0
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +163 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +24 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/statistics/quantile.py +290 -0
- maxframe/tensor/ufunc/__init__.py +24 -0
- maxframe/tensor/ufunc/ufunc.py +198 -0
- maxframe/tensor/utils.py +719 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_protocol.py +178 -0
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +627 -0
- maxframe/tests/utils.py +245 -0
- maxframe/typing_.py +42 -0
- maxframe/udf.py +435 -0
- maxframe/utils.py +1774 -0
- maxframe-2.4.0rc1.dist-info/METADATA +109 -0
- maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
- maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
- maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +16 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +137 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +411 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +39 -0
- maxframe_client/session/graph.py +125 -0
- maxframe_client/session/odps.py +813 -0
- maxframe_client/session/task.py +329 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +115 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +215 -0
- maxframe_client/tests/test_session.py +409 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ._split import KFold, train_test_split
|
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
import numbers
|
|
17
|
+
from abc import ABCMeta, abstractmethod
|
|
18
|
+
from itertools import chain
|
|
19
|
+
from math import ceil, floor
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
from ... import tensor as mt
|
|
24
|
+
from ...core import ExecutableTuple
|
|
25
|
+
from ...tensor.utils import check_random_state
|
|
26
|
+
from ..utils import shuffle as shuffle_arrays
|
|
27
|
+
from ..utils.validation import _num_samples, indexable
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def train_test_split(*arrays, **options):
|
|
31
|
+
"""Split arrays or matrices into random train and test subsets
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
*arrays : sequence of indexables with same length / shape[0]
|
|
36
|
+
Allowed inputs are lists, numpy arrays, scipy-sparse
|
|
37
|
+
matrices or pandas dataframes.
|
|
38
|
+
|
|
39
|
+
test_size : float, int or None, optional (default=None)
|
|
40
|
+
If float, should be between 0.0 and 1.0 and represent the proportion
|
|
41
|
+
of the dataset to include in the test split. If int, represents the
|
|
42
|
+
absolute number of test samples. If None, the value is set to the
|
|
43
|
+
complement of the train size. If ``train_size`` is also None, it will
|
|
44
|
+
be set to 0.25.
|
|
45
|
+
|
|
46
|
+
train_size : float, int, or None, (default=None)
|
|
47
|
+
If float, should be between 0.0 and 1.0 and represent the
|
|
48
|
+
proportion of the dataset to include in the train split. If
|
|
49
|
+
int, represents the absolute number of train samples. If None,
|
|
50
|
+
the value is automatically set to the complement of the test size.
|
|
51
|
+
|
|
52
|
+
random_state : int, RandomState instance or None, optional (default=None)
|
|
53
|
+
If int, random_state is the seed used by the random number generator;
|
|
54
|
+
If RandomState instance, random_state is the random number generator;
|
|
55
|
+
If None, the random number generator is the RandomState instance used
|
|
56
|
+
by `np.random`.
|
|
57
|
+
|
|
58
|
+
shuffle : boolean, optional (default=True)
|
|
59
|
+
Whether or not to shuffle the data before splitting. If shuffle=False
|
|
60
|
+
then stratify must be None. CURRENTLY only shuffle=False is supported.
|
|
61
|
+
|
|
62
|
+
stratify : array-like or None (default=None)
|
|
63
|
+
If not None, data is split in a stratified fashion, using this as
|
|
64
|
+
the class labels.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
splitting : list, length=2 * len(arrays)
|
|
69
|
+
List containing train-test split of inputs.
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
>>> import maxframe.tensor as mt
|
|
74
|
+
>>> from maxframe.learn.model_selection import train_test_split
|
|
75
|
+
>>> X, y = mt.arange(10).reshape((5, 2)), range(5)
|
|
76
|
+
>>> X.execute()
|
|
77
|
+
array([[0, 1],
|
|
78
|
+
[2, 3],
|
|
79
|
+
[4, 5],
|
|
80
|
+
[6, 7],
|
|
81
|
+
[8, 9]])
|
|
82
|
+
>>> list(y)
|
|
83
|
+
[0, 1, 2, 3, 4]
|
|
84
|
+
|
|
85
|
+
>>> X_train, X_test, y_train, y_test = train_test_split(
|
|
86
|
+
... X, y, test_size=0.33, random_state=42)
|
|
87
|
+
...
|
|
88
|
+
>>> X_train.execute()
|
|
89
|
+
array([[8, 9],
|
|
90
|
+
[0, 1],
|
|
91
|
+
[4, 5]])
|
|
92
|
+
>>> y_train.execute()
|
|
93
|
+
array([4, 0, 2])
|
|
94
|
+
>>> X_test.execute()
|
|
95
|
+
array([[2, 3],
|
|
96
|
+
[6, 7]])
|
|
97
|
+
>>> y_test.execute()
|
|
98
|
+
array([1, 3])
|
|
99
|
+
|
|
100
|
+
>>> train_test_split(y, shuffle=False)
|
|
101
|
+
[array([0, 1, 2]), array([3, 4])]
|
|
102
|
+
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
n_arrays = len(arrays)
|
|
106
|
+
if n_arrays == 0:
|
|
107
|
+
raise ValueError("At least one array required as input")
|
|
108
|
+
test_size = options.pop("test_size", None)
|
|
109
|
+
train_size = options.pop("train_size", None)
|
|
110
|
+
random_state = options.pop("random_state", None) # noqa: F841
|
|
111
|
+
stratify = options.pop("stratify", None)
|
|
112
|
+
shuffle = options.pop("shuffle", True)
|
|
113
|
+
|
|
114
|
+
if options:
|
|
115
|
+
raise TypeError(f"Invalid parameters passed: {options}")
|
|
116
|
+
|
|
117
|
+
arrays = indexable(*arrays)
|
|
118
|
+
|
|
119
|
+
n_samples = _num_samples(arrays[0])
|
|
120
|
+
n_train, n_test = _validate_shuffle_split(
|
|
121
|
+
n_samples, test_size, train_size, default_test_size=0.25
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if shuffle is False:
|
|
125
|
+
if stratify is not None: # pragma: no cover
|
|
126
|
+
raise ValueError(
|
|
127
|
+
"Stratified train/test split is not implemented for shuffle=False"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
iterables = ((a[:n_train], a[n_train : n_train + n_test]) for a in arrays)
|
|
131
|
+
else:
|
|
132
|
+
shuffled_arrays = shuffle_arrays(*arrays, random_state=random_state)
|
|
133
|
+
if not isinstance(shuffled_arrays, tuple):
|
|
134
|
+
shuffled_arrays = (shuffled_arrays,)
|
|
135
|
+
iterables = (
|
|
136
|
+
(a[:n_train], a[n_train : n_train + n_test]) for a in shuffled_arrays
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return list(ExecutableTuple(chain.from_iterable(iterables)))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _validate_shuffle_split(n_samples, test_size, train_size, default_test_size=None):
|
|
143
|
+
"""
|
|
144
|
+
Validation helper to check if the test/test sizes are meaningful wrt to the
|
|
145
|
+
size of the data (n_samples)
|
|
146
|
+
"""
|
|
147
|
+
if test_size is None and train_size is None:
|
|
148
|
+
test_size = default_test_size
|
|
149
|
+
|
|
150
|
+
test_size_type = np.asarray(test_size).dtype.kind
|
|
151
|
+
train_size_type = np.asarray(train_size).dtype.kind
|
|
152
|
+
|
|
153
|
+
if (
|
|
154
|
+
test_size_type == "i"
|
|
155
|
+
and (test_size >= n_samples or test_size <= 0)
|
|
156
|
+
or test_size_type == "f"
|
|
157
|
+
and (test_size <= 0 or test_size >= 1)
|
|
158
|
+
):
|
|
159
|
+
raise ValueError(
|
|
160
|
+
f"test_size={test_size} should be either positive and smaller"
|
|
161
|
+
f" than the number of samples {n_samples} or a float in the "
|
|
162
|
+
"(0, 1) range"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if (
|
|
166
|
+
train_size_type == "i"
|
|
167
|
+
and (train_size >= n_samples or train_size <= 0)
|
|
168
|
+
or train_size_type == "f"
|
|
169
|
+
and (train_size <= 0 or train_size >= 1)
|
|
170
|
+
):
|
|
171
|
+
raise ValueError(
|
|
172
|
+
f"train_size={train_size} should be either positive and smaller"
|
|
173
|
+
f" than the number of samples {n_samples} or a float in the "
|
|
174
|
+
"(0, 1) range"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if train_size is not None and train_size_type not in ("i", "f"): # pragma: no cover
|
|
178
|
+
raise ValueError(f"Invalid value for train_size: {train_size}")
|
|
179
|
+
if test_size is not None and test_size_type not in ("i", "f"):
|
|
180
|
+
raise ValueError(f"Invalid value for test_size: {test_size}")
|
|
181
|
+
|
|
182
|
+
if train_size_type == "f" and test_size_type == "f" and train_size + test_size > 1:
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"The sum of test_size and train_size = {train_size + test_size}, "
|
|
185
|
+
"should be in the (0, 1) range. Reduce test_size and/or train_size."
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if test_size_type == "f":
|
|
189
|
+
n_test = ceil(test_size * n_samples)
|
|
190
|
+
elif test_size_type == "i":
|
|
191
|
+
n_test = float(test_size)
|
|
192
|
+
|
|
193
|
+
if train_size_type == "f":
|
|
194
|
+
n_train = floor(train_size * n_samples)
|
|
195
|
+
elif train_size_type == "i": # pragma: no cover
|
|
196
|
+
n_train = float(train_size)
|
|
197
|
+
|
|
198
|
+
if train_size is None:
|
|
199
|
+
n_train = n_samples - n_test
|
|
200
|
+
elif test_size is None:
|
|
201
|
+
n_test = n_samples - n_train
|
|
202
|
+
|
|
203
|
+
if n_train + n_test > n_samples: # pragma: no cover
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"The sum of train_size and test_size = {n_train + n_test}, "
|
|
206
|
+
f"should be smaller than the number of samples {n_samples}. "
|
|
207
|
+
"Reduce test_size and/or train_size."
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
n_train, n_test = int(n_train), int(n_test)
|
|
211
|
+
|
|
212
|
+
if n_train == 0: # pragma: no cover
|
|
213
|
+
raise ValueError(
|
|
214
|
+
f"With n_samples={n_samples}, test_size={test_size} and "
|
|
215
|
+
f"train_size={train_size}, the resulting train set will "
|
|
216
|
+
f"be empty. Adjust any of the aforementioned parameters."
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
return n_train, n_test
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class BaseCrossValidator(metaclass=ABCMeta):
|
|
223
|
+
"""Base class for all cross-validators
|
|
224
|
+
|
|
225
|
+
Implementations must define `_iter_test_masks` or `_iter_test_indices`.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
def split(self, X, y=None, groups=None): # pragma: no cover
|
|
229
|
+
"""Generate indices to split data into training and test set.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
X : array-like of shape (n_samples, n_features)
|
|
234
|
+
Training data, where n_samples is the number of samples
|
|
235
|
+
and n_features is the number of features.
|
|
236
|
+
|
|
237
|
+
y : array-like of shape (n_samples,)
|
|
238
|
+
The target variable for supervised learning problems.
|
|
239
|
+
|
|
240
|
+
groups : array-like of shape (n_samples,), default=None
|
|
241
|
+
Group labels for the samples used while splitting the dataset into
|
|
242
|
+
train/test set.
|
|
243
|
+
|
|
244
|
+
Yields
|
|
245
|
+
------
|
|
246
|
+
train : ndarray
|
|
247
|
+
The training set indices for that split.
|
|
248
|
+
|
|
249
|
+
test : ndarray
|
|
250
|
+
The testing set indices for that split.
|
|
251
|
+
"""
|
|
252
|
+
X, y, groups = indexable(X, y, groups)
|
|
253
|
+
indices = mt.arange(_num_samples(X))
|
|
254
|
+
for test_index in self._iter_test_masks(X, y, groups):
|
|
255
|
+
train_index = indices[mt.logical_not(test_index)]
|
|
256
|
+
test_index = indices[test_index]
|
|
257
|
+
yield train_index, test_index
|
|
258
|
+
|
|
259
|
+
# Since subclasses must implement either _iter_test_masks or
|
|
260
|
+
# _iter_test_indices, neither can be abstract.
|
|
261
|
+
def _iter_test_masks(self, X=None, y=None, groups=None): # pragma: no cover
|
|
262
|
+
"""Generates boolean masks corresponding to test sets.
|
|
263
|
+
|
|
264
|
+
By default, delegates to _iter_test_indices(X, y, groups)
|
|
265
|
+
"""
|
|
266
|
+
for test_index in self._iter_test_indices(X, y, groups):
|
|
267
|
+
test_mask = mt.zeros(_num_samples(X), dtype=bool)
|
|
268
|
+
test_mask[test_index] = True
|
|
269
|
+
yield test_mask
|
|
270
|
+
|
|
271
|
+
def _iter_test_indices(self, X=None, y=None, groups=None): # pragma: no cover
|
|
272
|
+
"""Generates integer indices corresponding to test sets."""
|
|
273
|
+
raise NotImplementedError
|
|
274
|
+
|
|
275
|
+
@abstractmethod
|
|
276
|
+
def get_n_splits(self, X=None, y=None, groups=None):
|
|
277
|
+
"""Returns the number of splitting iterations in the cross-validator"""
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class _BaseKFold(BaseCrossValidator, metaclass=ABCMeta):
|
|
281
|
+
"""Base class for KFold, GroupKFold, and StratifiedKFold"""
|
|
282
|
+
|
|
283
|
+
@abstractmethod
|
|
284
|
+
def __init__(self, n_splits, *, shuffle, random_state):
|
|
285
|
+
if not isinstance(n_splits, numbers.Integral):
|
|
286
|
+
raise ValueError(
|
|
287
|
+
"The number of folds must be of Integral type. "
|
|
288
|
+
"%s of type %s was passed." % (n_splits, type(n_splits))
|
|
289
|
+
)
|
|
290
|
+
n_splits = int(n_splits)
|
|
291
|
+
|
|
292
|
+
if n_splits <= 1:
|
|
293
|
+
raise ValueError(
|
|
294
|
+
"k-fold cross-validation requires at least one"
|
|
295
|
+
" train/test split by setting n_splits=2 or more,"
|
|
296
|
+
" got n_splits={0}.".format(n_splits)
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
if not isinstance(shuffle, bool):
|
|
300
|
+
raise TypeError("shuffle must be True or False; got {0}".format(shuffle))
|
|
301
|
+
|
|
302
|
+
if not shuffle and random_state is not None: # None is the default
|
|
303
|
+
raise ValueError(
|
|
304
|
+
"Setting a random_state has no effect since shuffle is "
|
|
305
|
+
"False. You should leave "
|
|
306
|
+
"random_state to its default (None), or set shuffle=True.",
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
self.n_splits = n_splits
|
|
310
|
+
self.shuffle = shuffle
|
|
311
|
+
self.random_state = random_state
|
|
312
|
+
|
|
313
|
+
def get_n_splits(self, X=None, y=None, groups=None):
|
|
314
|
+
"""Returns the number of splitting iterations in the cross-validator
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
X : object
|
|
319
|
+
Always ignored, exists for compatibility.
|
|
320
|
+
|
|
321
|
+
y : object
|
|
322
|
+
Always ignored, exists for compatibility.
|
|
323
|
+
|
|
324
|
+
groups : object
|
|
325
|
+
Always ignored, exists for compatibility.
|
|
326
|
+
|
|
327
|
+
Returns
|
|
328
|
+
-------
|
|
329
|
+
n_splits : int
|
|
330
|
+
Returns the number of splitting iterations in the cross-validator.
|
|
331
|
+
"""
|
|
332
|
+
return self.n_splits
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
class KFold(_BaseKFold):
|
|
336
|
+
"""K-Folds cross-validator
|
|
337
|
+
|
|
338
|
+
Provides train/test indices to split data in train/test sets. Split
|
|
339
|
+
dataset into k consecutive folds (without shuffling by default).
|
|
340
|
+
|
|
341
|
+
Each fold is then used once as a validation while the k - 1 remaining
|
|
342
|
+
folds form the training set.
|
|
343
|
+
|
|
344
|
+
Parameters
|
|
345
|
+
----------
|
|
346
|
+
n_splits : int, default=5
|
|
347
|
+
Number of folds. Must be at least 2.
|
|
348
|
+
|
|
349
|
+
.. versionchanged:: 0.22
|
|
350
|
+
``n_splits`` default value changed from 3 to 5.
|
|
351
|
+
|
|
352
|
+
shuffle : bool, default=False
|
|
353
|
+
Whether to shuffle the data before splitting into batches.
|
|
354
|
+
Note that the samples within each split will not be shuffled.
|
|
355
|
+
|
|
356
|
+
random_state : int, RandomState instance or None, default=None
|
|
357
|
+
When `shuffle` is True, `random_state` affects the ordering of the
|
|
358
|
+
indices, which controls the randomness of each fold. Otherwise, this
|
|
359
|
+
parameter has no effect.
|
|
360
|
+
Pass an int for reproducible output across multiple function calls.
|
|
361
|
+
See :term:`Glossary <random_state>`.
|
|
362
|
+
|
|
363
|
+
Examples
|
|
364
|
+
--------
|
|
365
|
+
>>> import maxframe.tensor as mt
|
|
366
|
+
>>> from maxframe.learn.model_selection import KFold
|
|
367
|
+
>>> X = mt.array([[1, 2], [3, 4], [1, 2], [3, 4]])
|
|
368
|
+
>>> y = mt.array([1, 2, 3, 4])
|
|
369
|
+
>>> kf = KFold(n_splits=2)
|
|
370
|
+
>>> kf.get_n_splits(X)
|
|
371
|
+
2
|
|
372
|
+
>>> print(kf)
|
|
373
|
+
KFold(n_splits=2, random_state=None, shuffle=False)
|
|
374
|
+
>>> for train_index, test_index in kf.split(X):
|
|
375
|
+
... print("TRAIN:", train_index, "TEST:", test_index)
|
|
376
|
+
... X_train, X_test = X[train_index], X[test_index]
|
|
377
|
+
... y_train, y_test = y[train_index], y[test_index]
|
|
378
|
+
TRAIN: [2 3] TEST: [0 1]
|
|
379
|
+
TRAIN: [0 1] TEST: [2 3]
|
|
380
|
+
|
|
381
|
+
Notes
|
|
382
|
+
-----
|
|
383
|
+
The first ``n_samples % n_splits`` folds have size
|
|
384
|
+
``n_samples // n_splits + 1``, other folds have size
|
|
385
|
+
``n_samples // n_splits``, where ``n_samples`` is the number of samples.
|
|
386
|
+
|
|
387
|
+
Randomized CV splitters may return different results for each call of
|
|
388
|
+
split. You can make the results identical by setting `random_state`
|
|
389
|
+
to an integer.
|
|
390
|
+
|
|
391
|
+
See Also
|
|
392
|
+
--------
|
|
393
|
+
StratifiedKFold : Takes group information into account to avoid building
|
|
394
|
+
folds with imbalanced class distributions (for binary or multiclass
|
|
395
|
+
classification tasks).
|
|
396
|
+
|
|
397
|
+
GroupKFold : K-fold iterator variant with non-overlapping groups.
|
|
398
|
+
|
|
399
|
+
RepeatedKFold : Repeats K-Fold n times.
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
def __init__(self, n_splits=5, *, shuffle=False, random_state=None):
|
|
403
|
+
super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
|
|
404
|
+
|
|
405
|
+
def split(self, X, y=None, groups=None):
|
|
406
|
+
"""Generate indices to split data into training and test set.
|
|
407
|
+
|
|
408
|
+
Parameters
|
|
409
|
+
----------
|
|
410
|
+
X : array-like of shape (n_samples, n_features)
|
|
411
|
+
Training data, where n_samples is the number of samples
|
|
412
|
+
and n_features is the number of features.
|
|
413
|
+
|
|
414
|
+
y : array-like of shape (n_samples,), default=None
|
|
415
|
+
The target variable for supervised learning problems.
|
|
416
|
+
|
|
417
|
+
groups : array-like of shape (n_samples,), default=None
|
|
418
|
+
Group labels for the samples used while splitting the dataset into
|
|
419
|
+
train/test set.
|
|
420
|
+
|
|
421
|
+
Yields
|
|
422
|
+
------
|
|
423
|
+
train : ndarray
|
|
424
|
+
The training set indices for that split.
|
|
425
|
+
|
|
426
|
+
test : ndarray
|
|
427
|
+
The testing set indices for that split.
|
|
428
|
+
"""
|
|
429
|
+
X, y, groups = indexable(X, y, groups)
|
|
430
|
+
n_samples = _num_samples(X)
|
|
431
|
+
if self.n_splits > n_samples:
|
|
432
|
+
raise ValueError(
|
|
433
|
+
(
|
|
434
|
+
"Cannot have number of splits n_splits={0} greater"
|
|
435
|
+
" than the number of samples: n_samples={1}."
|
|
436
|
+
).format(self.n_splits, n_samples)
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
indices = mt.arange(n_samples)
|
|
440
|
+
if self.shuffle:
|
|
441
|
+
check_random_state(self.random_state).shuffle(indices)
|
|
442
|
+
|
|
443
|
+
n_splits = self.n_splits
|
|
444
|
+
fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=int)
|
|
445
|
+
fold_sizes[: n_samples % n_splits] += 1
|
|
446
|
+
current = 0
|
|
447
|
+
for fold_size in fold_sizes:
|
|
448
|
+
start, stop = current, current + fold_size
|
|
449
|
+
train_index = mt.concatenate([indices[:start], indices[stop:]])
|
|
450
|
+
yield train_index, indices[start:stop]
|
|
451
|
+
current = stop
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import itertools
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import pytest
|
|
20
|
+
|
|
21
|
+
from .... import dataframe as md
|
|
22
|
+
from .... import tensor as mt
|
|
23
|
+
from ....dataframe.core import DATAFRAME_TYPE
|
|
24
|
+
from .._split import KFold, train_test_split
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_train_test_split_errors():
|
|
28
|
+
pytest.raises(ValueError, train_test_split)
|
|
29
|
+
|
|
30
|
+
pytest.raises(ValueError, train_test_split, range(3), train_size=1.1)
|
|
31
|
+
|
|
32
|
+
pytest.raises(ValueError, train_test_split, range(3), test_size=0.6, train_size=0.6)
|
|
33
|
+
pytest.raises(
|
|
34
|
+
ValueError,
|
|
35
|
+
train_test_split,
|
|
36
|
+
range(3),
|
|
37
|
+
test_size=np.float32(0.6),
|
|
38
|
+
train_size=np.float32(0.6),
|
|
39
|
+
)
|
|
40
|
+
pytest.raises(ValueError, train_test_split, range(3), test_size="wrong_type")
|
|
41
|
+
pytest.raises(ValueError, train_test_split, range(3), test_size=2, train_size=4)
|
|
42
|
+
pytest.raises(TypeError, train_test_split, range(3), some_argument=1.1)
|
|
43
|
+
pytest.raises(ValueError, train_test_split, range(3), range(42))
|
|
44
|
+
pytest.raises(ValueError, train_test_split, range(10), shuffle=False, stratify=True)
|
|
45
|
+
|
|
46
|
+
with pytest.raises(
|
|
47
|
+
ValueError,
|
|
48
|
+
match=r"train_size=11 should be either positive and "
|
|
49
|
+
r"smaller than the number of samples 10 or a "
|
|
50
|
+
r"float in the \(0, 1\) range",
|
|
51
|
+
):
|
|
52
|
+
train_test_split(range(10), train_size=11, test_size=1)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_train_test_split_invalid_sizes1():
|
|
56
|
+
for train_size, test_size in [
|
|
57
|
+
(1.2, 0.8),
|
|
58
|
+
(1.0, 0.8),
|
|
59
|
+
(0.0, 0.8),
|
|
60
|
+
(-0.2, 0.8),
|
|
61
|
+
(0.8, 1.2),
|
|
62
|
+
(0.8, 1.0),
|
|
63
|
+
(0.8, 0.0),
|
|
64
|
+
(0.8, -0.2),
|
|
65
|
+
]:
|
|
66
|
+
with pytest.raises(ValueError, match=r"should be .* in the \(0, 1\) range"):
|
|
67
|
+
train_test_split(range(10), train_size=train_size, test_size=test_size)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_train_test_split_invalid_sizes2():
|
|
71
|
+
for train_size, test_size in [
|
|
72
|
+
(-10, 0.8),
|
|
73
|
+
(0, 0.8),
|
|
74
|
+
(11, 0.8),
|
|
75
|
+
(0.8, -10),
|
|
76
|
+
(0.8, 0),
|
|
77
|
+
(0.8, 11),
|
|
78
|
+
]:
|
|
79
|
+
with pytest.raises(ValueError, match=r"should be .* in the \(0, 1\) range"):
|
|
80
|
+
train_test_split(range(10), train_size=train_size, test_size=test_size)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_train_test_split():
|
|
84
|
+
# allow nd-arrays
|
|
85
|
+
X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
|
|
86
|
+
y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
|
|
87
|
+
split = train_test_split(X_4d, y_3d)
|
|
88
|
+
assert split[0].shape == (7, 5, 3, 2)
|
|
89
|
+
assert split[1].shape == (3, 5, 3, 2)
|
|
90
|
+
assert split[2].shape == (7, 7, 11)
|
|
91
|
+
assert split[3].shape == (3, 7, 11)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_train_test_split_dataframe():
|
|
95
|
+
X = np.ones(10)
|
|
96
|
+
types = [pd.DataFrame, md.DataFrame]
|
|
97
|
+
for InputFeatureType in types:
|
|
98
|
+
# X dataframe
|
|
99
|
+
X_df = InputFeatureType(X)
|
|
100
|
+
X_train, X_test = train_test_split(X_df)
|
|
101
|
+
assert isinstance(X_train, DATAFRAME_TYPE)
|
|
102
|
+
assert isinstance(X_test, DATAFRAME_TYPE)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_mixed_input_type_train_test_split():
|
|
106
|
+
rs = np.random.RandomState(0)
|
|
107
|
+
df_raw = pd.DataFrame(rs.rand(10, 4))
|
|
108
|
+
df = md.DataFrame(df_raw, chunk_size=5)
|
|
109
|
+
X, y = df.iloc[:, :-1], df.iloc[:, -1]
|
|
110
|
+
|
|
111
|
+
for x_to_tensor, y_to_tensor in itertools.product(range(1), range(1)):
|
|
112
|
+
x = X
|
|
113
|
+
if x_to_tensor:
|
|
114
|
+
x = mt.tensor(x)
|
|
115
|
+
yy = y
|
|
116
|
+
if y_to_tensor:
|
|
117
|
+
yy = mt.tensor(yy)
|
|
118
|
+
|
|
119
|
+
x_train, x_test, y_train, y_test = train_test_split(x, y)
|
|
120
|
+
assert isinstance(x_train, type(x))
|
|
121
|
+
assert isinstance(x_test, type(x))
|
|
122
|
+
assert isinstance(y_train, type(yy))
|
|
123
|
+
assert isinstance(y_test, type(yy))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def test_kfold_valueerrors():
|
|
127
|
+
X1 = np.array([[1, 2], [3, 4], [5, 6]])
|
|
128
|
+
# Check that errors are raised if there is not enough samples
|
|
129
|
+
with pytest.raises(ValueError):
|
|
130
|
+
next(KFold(4).split(X1))
|
|
131
|
+
|
|
132
|
+
# Error when number of folds is <= 1
|
|
133
|
+
with pytest.raises(ValueError):
|
|
134
|
+
KFold(0)
|
|
135
|
+
with pytest.raises(ValueError):
|
|
136
|
+
KFold(1)
|
|
137
|
+
|
|
138
|
+
# When n_splits is not integer:
|
|
139
|
+
with pytest.raises(ValueError):
|
|
140
|
+
KFold(1.5)
|
|
141
|
+
with pytest.raises(ValueError):
|
|
142
|
+
KFold(2.0)
|
|
143
|
+
|
|
144
|
+
# When shuffle is not a bool:
|
|
145
|
+
with pytest.raises(TypeError):
|
|
146
|
+
KFold(n_splits=4, shuffle=None)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_kfold_balance():
|
|
150
|
+
# Check that KFold returns folds with balanced sizes
|
|
151
|
+
for i in range(11, 17):
|
|
152
|
+
kf = KFold(5).split(X=np.ones(i))
|
|
153
|
+
sizes = [len(test) for _, test in kf]
|
|
154
|
+
|
|
155
|
+
assert (np.max(sizes) - np.min(sizes)) <= 1
|
|
156
|
+
assert np.sum(sizes) == i
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ._data import MinMaxScaler, StandardScaler, minmax_scale, normalize, scale
|
|
16
|
+
from ._label import LabelBinarizer, LabelEncoder, label_binarize
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .min_max_scaler import MinMaxScaler, minmax_scale
|
|
16
|
+
from .normalize import normalize
|
|
17
|
+
from .standard_scaler import StandardScaler, scale
|