PyPI - maxframe - Versions diffs - 1.3.0__cp310-cp310-macosx_10_9_universal2.whl → 2.0.0__cp310-cp310-macosx_10_9_universal2.whl - Mend

maxframe 1.3.0__cp310-cp310-macosx_10_9_universal2.whl → 2.0.0__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (644) hide show

maxframe/_utils.cpython-310-darwin.so +0 -0
maxframe/_utils.pyi +21 -0
maxframe/_utils.pyx +4 -3
maxframe/codegen/__init__.py +27 -0
maxframe/{codegen.py → codegen/core.py} +49 -43
maxframe/codegen/spe/__init__.py +16 -0
maxframe/codegen/spe/core.py +307 -0
maxframe/codegen/spe/dataframe/__init__.py +37 -0
maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
maxframe/codegen/spe/dataframe/datasource.py +181 -0
maxframe/codegen/spe/dataframe/datastore.py +204 -0
maxframe/codegen/spe/dataframe/extensions.py +63 -0
maxframe/codegen/spe/dataframe/fetch.py +26 -0
maxframe/codegen/spe/dataframe/groupby.py +224 -0
maxframe/codegen/spe/dataframe/indexing.py +238 -0
maxframe/codegen/spe/dataframe/merge.py +73 -0
maxframe/codegen/spe/dataframe/misc.py +286 -0
maxframe/codegen/spe/dataframe/missing.py +64 -0
maxframe/codegen/spe/dataframe/reduction.py +160 -0
maxframe/codegen/spe/dataframe/sort.py +83 -0
maxframe/codegen/spe/dataframe/statistics.py +46 -0
maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
maxframe/codegen/spe/dataframe/tseries.py +46 -0
maxframe/codegen/spe/dataframe/udf.py +62 -0
maxframe/codegen/spe/dataframe/value_counts.py +31 -0
maxframe/codegen/spe/dataframe/window.py +65 -0
maxframe/codegen/spe/learn/__init__.py +15 -0
maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
maxframe/codegen/spe/learn/contrib/models.py +41 -0
maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
maxframe/codegen/spe/learn/utils/__init__.py +15 -0
maxframe/codegen/spe/learn/utils/checks.py +55 -0
maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
maxframe/codegen/spe/learn/utils/validation.py +35 -0
maxframe/codegen/spe/objects.py +26 -0
maxframe/codegen/spe/remote.py +29 -0
maxframe/codegen/spe/tensor/__init__.py +28 -0
maxframe/codegen/spe/tensor/arithmetic.py +95 -0
maxframe/codegen/spe/tensor/core.py +41 -0
maxframe/codegen/spe/tensor/datasource.py +165 -0
maxframe/codegen/spe/tensor/extensions.py +35 -0
maxframe/codegen/spe/tensor/fetch.py +26 -0
maxframe/codegen/spe/tensor/indexing.py +63 -0
maxframe/codegen/spe/tensor/linalg.py +63 -0
maxframe/codegen/spe/tensor/merge.py +31 -0
maxframe/codegen/spe/tensor/misc.py +121 -0
maxframe/codegen/spe/tensor/random.py +29 -0
maxframe/codegen/spe/tensor/reduction.py +39 -0
maxframe/codegen/spe/tensor/reshape.py +26 -0
maxframe/codegen/spe/tensor/sort.py +42 -0
maxframe/codegen/spe/tensor/special.py +35 -0
maxframe/codegen/spe/tensor/statistics.py +24 -0
maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
maxframe/codegen/spe/tests/__init__.py +13 -0
maxframe/codegen/spe/tests/test_remote.py +29 -0
maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
maxframe/codegen/spe/utils.py +54 -0
maxframe/codegen/tests/__init__.py +13 -0
maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
maxframe/config/__init__.py +1 -1
maxframe/config/config.py +50 -23
maxframe/config/tests/test_config.py +4 -12
maxframe/config/validators.py +5 -0
maxframe/conftest.py +38 -10
maxframe/core/__init__.py +1 -0
maxframe/core/context.py +110 -0
maxframe/core/entity/__init__.py +1 -0
maxframe/core/entity/core.py +0 -7
maxframe/core/entity/objects.py +19 -5
maxframe/core/entity/output_types.py +11 -0
maxframe/core/entity/tests/test_objects.py +11 -12
maxframe/core/entity/tileables.py +3 -1
maxframe/core/entity/utils.py +15 -0
maxframe/core/graph/__init__.py +6 -1
maxframe/core/graph/builder/base.py +5 -1
maxframe/core/graph/core.cpython-310-darwin.so +0 -0
maxframe/core/graph/core.pyx +17 -6
maxframe/core/graph/entity.py +18 -6
maxframe/core/operator/__init__.py +8 -3
maxframe/core/operator/base.py +35 -12
maxframe/core/operator/core.py +37 -14
maxframe/core/operator/fetch.py +5 -18
maxframe/core/operator/objects.py +0 -20
maxframe/core/operator/shuffle.py +6 -72
maxframe/dataframe/__init__.py +1 -0
maxframe/dataframe/accessors/datetime_/core.py +7 -4
maxframe/dataframe/accessors/string_/core.py +9 -6
maxframe/dataframe/arithmetic/core.py +31 -20
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/dataframe/core.py +98 -91
maxframe/dataframe/datasource/core.py +8 -1
maxframe/dataframe/datasource/date_range.py +8 -0
maxframe/dataframe/datasource/from_index.py +9 -5
maxframe/dataframe/datasource/from_records.py +9 -2
maxframe/dataframe/datasource/from_tensor.py +32 -21
maxframe/dataframe/datasource/read_csv.py +8 -2
maxframe/dataframe/datasource/read_odps_query.py +109 -19
maxframe/dataframe/datasource/read_odps_table.py +20 -5
maxframe/dataframe/datasource/read_parquet.py +8 -3
maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
maxframe/dataframe/datastore/to_csv.py +7 -3
maxframe/dataframe/datastore/to_odps.py +42 -6
maxframe/dataframe/extensions/__init__.py +6 -1
maxframe/dataframe/extensions/apply_chunk.py +96 -136
maxframe/dataframe/extensions/flatjson.py +3 -2
maxframe/dataframe/extensions/flatmap.py +15 -7
maxframe/dataframe/fetch/core.py +12 -1
maxframe/dataframe/groupby/__init__.py +7 -0
maxframe/dataframe/groupby/aggregation.py +62 -9
maxframe/dataframe/groupby/apply.py +50 -74
maxframe/dataframe/groupby/apply_chunk.py +393 -0
maxframe/dataframe/groupby/core.py +80 -17
maxframe/dataframe/groupby/extensions.py +26 -0
maxframe/dataframe/groupby/fill.py +9 -4
maxframe/dataframe/groupby/sample.py +7 -7
maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
maxframe/dataframe/groupby/transform.py +57 -54
maxframe/dataframe/indexing/align.py +7 -6
maxframe/dataframe/indexing/getitem.py +9 -8
maxframe/dataframe/indexing/iloc.py +28 -23
maxframe/dataframe/indexing/insert.py +7 -3
maxframe/dataframe/indexing/loc.py +9 -8
maxframe/dataframe/indexing/reindex.py +36 -30
maxframe/dataframe/indexing/rename_axis.py +18 -10
maxframe/dataframe/indexing/reset_index.py +0 -2
maxframe/dataframe/indexing/sample.py +13 -9
maxframe/dataframe/indexing/set_axis.py +9 -6
maxframe/dataframe/indexing/setitem.py +8 -5
maxframe/dataframe/indexing/where.py +12 -9
maxframe/dataframe/merge/__init__.py +0 -1
maxframe/dataframe/merge/concat.py +10 -31
maxframe/dataframe/merge/merge.py +2 -24
maxframe/dataframe/misc/__init__.py +6 -0
maxframe/dataframe/misc/_duplicate.py +7 -3
maxframe/dataframe/misc/apply.py +106 -139
maxframe/dataframe/misc/astype.py +3 -2
maxframe/dataframe/misc/case_when.py +11 -7
maxframe/dataframe/misc/cut.py +11 -10
maxframe/dataframe/misc/describe.py +7 -3
maxframe/dataframe/misc/drop.py +13 -11
maxframe/dataframe/misc/eval.py +0 -2
maxframe/dataframe/misc/get_dummies.py +78 -49
maxframe/dataframe/misc/isin.py +13 -10
maxframe/dataframe/misc/map.py +21 -6
maxframe/dataframe/misc/melt.py +8 -1
maxframe/dataframe/misc/pivot.py +232 -0
maxframe/dataframe/misc/pivot_table.py +52 -40
maxframe/dataframe/misc/rechunk.py +59 -0
maxframe/dataframe/misc/shift.py +7 -4
maxframe/dataframe/misc/stack.py +5 -3
maxframe/dataframe/misc/tests/test_misc.py +167 -1
maxframe/dataframe/misc/transform.py +63 -65
maxframe/dataframe/misc/value_counts.py +7 -4
maxframe/dataframe/missing/dropna.py +16 -7
maxframe/dataframe/missing/fillna.py +18 -10
maxframe/dataframe/missing/replace.py +10 -6
maxframe/dataframe/missing/tests/test_missing.py +2 -2
maxframe/dataframe/operators.py +1 -27
maxframe/dataframe/reduction/aggregation.py +128 -3
maxframe/dataframe/reduction/core.py +20 -6
maxframe/dataframe/reduction/median.py +1 -1
maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
maxframe/dataframe/reduction/unique.py +53 -7
maxframe/dataframe/statistics/corr.py +9 -6
maxframe/dataframe/statistics/quantile.py +9 -6
maxframe/dataframe/tseries/to_datetime.py +6 -4
maxframe/dataframe/utils.py +219 -31
maxframe/dataframe/window/rolling.py +7 -4
maxframe/env.py +1 -0
maxframe/errors.py +9 -0
maxframe/extension.py +13 -2
maxframe/io/objects/core.py +67 -51
maxframe/io/objects/tensor.py +73 -17
maxframe/io/objects/tests/test_object_io.py +10 -55
maxframe/io/odpsio/arrow.py +15 -2
maxframe/io/odpsio/schema.py +43 -13
maxframe/io/odpsio/tableio.py +63 -11
maxframe/io/odpsio/tests/test_arrow.py +1 -2
maxframe/io/odpsio/tests/test_schema.py +114 -1
maxframe/io/odpsio/tests/test_tableio.py +42 -0
maxframe/io/odpsio/tests/test_volumeio.py +21 -58
maxframe/io/odpsio/volumeio.py +23 -8
maxframe/learn/__init__.py +2 -2
maxframe/learn/contrib/__init__.py +2 -2
maxframe/learn/contrib/graph/connected_components.py +2 -1
maxframe/learn/contrib/lightgbm/__init__.py +33 -0
maxframe/learn/contrib/lightgbm/_predict.py +138 -0
maxframe/learn/contrib/lightgbm/_train.py +163 -0
maxframe/learn/contrib/lightgbm/callback.py +114 -0
maxframe/learn/contrib/lightgbm/classifier.py +199 -0
maxframe/learn/contrib/lightgbm/core.py +372 -0
maxframe/learn/contrib/lightgbm/dataset.py +153 -0
maxframe/learn/contrib/lightgbm/regressor.py +29 -0
maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
maxframe/learn/contrib/llm/models/dashscope.py +34 -0
maxframe/learn/contrib/llm/models/managed.py +15 -0
maxframe/learn/contrib/llm/multi_modal.py +92 -0
maxframe/learn/contrib/llm/text.py +21 -5
maxframe/learn/contrib/models.py +38 -9
maxframe/learn/contrib/utils.py +55 -0
maxframe/learn/contrib/xgboost/callback.py +86 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -30
maxframe/learn/contrib/xgboost/core.py +54 -42
maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
maxframe/learn/contrib/xgboost/predict.py +13 -8
maxframe/learn/contrib/xgboost/regressor.py +28 -27
maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
maxframe/learn/contrib/xgboost/train.py +59 -16
maxframe/learn/core.py +252 -0
maxframe/learn/datasets/__init__.py +20 -0
maxframe/learn/datasets/samples_generator.py +628 -0
maxframe/learn/linear_model/__init__.py +15 -0
maxframe/learn/linear_model/_base.py +163 -0
maxframe/learn/linear_model/_lin_reg.py +175 -0
maxframe/learn/metrics/__init__.py +25 -0
maxframe/learn/metrics/_check_targets.py +95 -0
maxframe/learn/metrics/_classification.py +1121 -0
maxframe/learn/metrics/_regression.py +256 -0
maxframe/learn/model_selection/__init__.py +15 -0
maxframe/learn/model_selection/_split.py +451 -0
maxframe/learn/model_selection/tests/__init__.py +13 -0
maxframe/learn/model_selection/tests/test_split.py +156 -0
maxframe/learn/preprocessing/__init__.py +16 -0
maxframe/learn/preprocessing/_data/__init__.py +17 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
maxframe/learn/preprocessing/_data/normalize.py +127 -0
maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
maxframe/learn/preprocessing/_data/utils.py +79 -0
maxframe/learn/preprocessing/_label/__init__.py +16 -0
maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
maxframe/learn/utils/__init__.py +4 -0
maxframe/learn/utils/_encode.py +314 -0
maxframe/learn/utils/checks.py +161 -0
maxframe/learn/utils/core.py +33 -0
maxframe/learn/utils/extmath.py +176 -0
maxframe/learn/utils/multiclass.py +292 -0
maxframe/learn/utils/shuffle.py +114 -0
maxframe/learn/utils/sparsefuncs.py +87 -0
maxframe/learn/utils/validation.py +775 -0
maxframe/lib/__init__.py +0 -2
maxframe/lib/compat.py +145 -0
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
maxframe/lib/sparse/__init__.py +10 -15
maxframe/lib/sparse/array.py +45 -33
maxframe/lib/sparse/core.py +0 -2
maxframe/lib/sparse/linalg.py +31 -0
maxframe/lib/sparse/matrix.py +5 -2
maxframe/lib/sparse/tests/__init__.py +0 -2
maxframe/lib/sparse/tests/test_sparse.py +53 -53
maxframe/lib/sparse/vector.py +0 -2
maxframe/mixin.py +59 -2
maxframe/opcodes.py +13 -5
maxframe/protocol.py +67 -14
maxframe/remote/core.py +16 -14
maxframe/remote/run_script.py +6 -3
maxframe/serialization/__init__.py +2 -0
maxframe/serialization/core.cpython-310-darwin.so +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +3 -1
maxframe/serialization/core.pyx +82 -4
maxframe/serialization/pandas.py +5 -1
maxframe/serialization/serializables/core.py +6 -5
maxframe/serialization/serializables/field.py +2 -2
maxframe/serialization/serializables/tests/test_field_type.py +3 -5
maxframe/serialization/tests/test_serial.py +27 -0
maxframe/session.py +4 -71
maxframe/sperunner.py +165 -0
maxframe/tensor/__init__.py +35 -2
maxframe/tensor/arithmetic/__init__.py +2 -4
maxframe/tensor/arithmetic/abs.py +0 -2
maxframe/tensor/arithmetic/absolute.py +0 -2
maxframe/tensor/arithmetic/add.py +34 -4
maxframe/tensor/arithmetic/angle.py +0 -2
maxframe/tensor/arithmetic/arccos.py +1 -4
maxframe/tensor/arithmetic/arccosh.py +1 -3
maxframe/tensor/arithmetic/arcsin.py +0 -2
maxframe/tensor/arithmetic/arcsinh.py +0 -2
maxframe/tensor/arithmetic/arctan.py +0 -2
maxframe/tensor/arithmetic/arctan2.py +0 -2
maxframe/tensor/arithmetic/arctanh.py +0 -2
maxframe/tensor/arithmetic/around.py +0 -2
maxframe/tensor/arithmetic/bitand.py +0 -2
maxframe/tensor/arithmetic/bitor.py +1 -3
maxframe/tensor/arithmetic/bitxor.py +1 -3
maxframe/tensor/arithmetic/cbrt.py +0 -2
maxframe/tensor/arithmetic/ceil.py +0 -2
maxframe/tensor/arithmetic/clip.py +13 -13
maxframe/tensor/arithmetic/conj.py +0 -2
maxframe/tensor/arithmetic/copysign.py +0 -2
maxframe/tensor/arithmetic/core.py +47 -39
maxframe/tensor/arithmetic/cos.py +1 -3
maxframe/tensor/arithmetic/cosh.py +0 -2
maxframe/tensor/arithmetic/deg2rad.py +0 -2
maxframe/tensor/arithmetic/degrees.py +0 -2
maxframe/tensor/arithmetic/divide.py +0 -2
maxframe/tensor/arithmetic/equal.py +0 -2
maxframe/tensor/arithmetic/exp.py +1 -3
maxframe/tensor/arithmetic/exp2.py +0 -2
maxframe/tensor/arithmetic/expm1.py +0 -2
maxframe/tensor/arithmetic/fabs.py +0 -2
maxframe/tensor/arithmetic/fix.py +0 -2
maxframe/tensor/arithmetic/float_power.py +0 -2
maxframe/tensor/arithmetic/floor.py +0 -2
maxframe/tensor/arithmetic/floordiv.py +0 -2
maxframe/tensor/arithmetic/fmax.py +0 -2
maxframe/tensor/arithmetic/fmin.py +0 -2
maxframe/tensor/arithmetic/fmod.py +0 -2
maxframe/tensor/arithmetic/frexp.py +6 -2
maxframe/tensor/arithmetic/greater.py +0 -2
maxframe/tensor/arithmetic/greater_equal.py +0 -2
maxframe/tensor/arithmetic/hypot.py +0 -2
maxframe/tensor/arithmetic/i0.py +1 -3
maxframe/tensor/arithmetic/imag.py +0 -2
maxframe/tensor/arithmetic/invert.py +1 -3
maxframe/tensor/arithmetic/isclose.py +0 -2
maxframe/tensor/arithmetic/iscomplex.py +0 -2
maxframe/tensor/arithmetic/isfinite.py +1 -3
maxframe/tensor/arithmetic/isinf.py +0 -2
maxframe/tensor/arithmetic/isnan.py +0 -2
maxframe/tensor/arithmetic/isreal.py +0 -2
maxframe/tensor/arithmetic/ldexp.py +0 -2
maxframe/tensor/arithmetic/less.py +0 -2
maxframe/tensor/arithmetic/less_equal.py +0 -2
maxframe/tensor/arithmetic/log.py +1 -3
maxframe/tensor/arithmetic/log10.py +1 -3
maxframe/tensor/arithmetic/log1p.py +1 -3
maxframe/tensor/arithmetic/log2.py +1 -3
maxframe/tensor/arithmetic/logaddexp.py +0 -2
maxframe/tensor/arithmetic/logaddexp2.py +0 -2
maxframe/tensor/arithmetic/logical_and.py +0 -2
maxframe/tensor/arithmetic/logical_not.py +1 -3
maxframe/tensor/arithmetic/logical_or.py +0 -2
maxframe/tensor/arithmetic/logical_xor.py +0 -2
maxframe/tensor/arithmetic/lshift.py +0 -2
maxframe/tensor/arithmetic/maximum.py +0 -2
maxframe/tensor/arithmetic/minimum.py +0 -2
maxframe/tensor/arithmetic/mod.py +0 -2
maxframe/tensor/arithmetic/modf.py +6 -2
maxframe/tensor/arithmetic/multiply.py +37 -4
maxframe/tensor/arithmetic/nan_to_num.py +0 -2
maxframe/tensor/arithmetic/negative.py +0 -2
maxframe/tensor/arithmetic/nextafter.py +0 -2
maxframe/tensor/arithmetic/not_equal.py +0 -2
maxframe/tensor/arithmetic/positive.py +0 -2
maxframe/tensor/arithmetic/power.py +0 -2
maxframe/tensor/arithmetic/rad2deg.py +0 -2
maxframe/tensor/arithmetic/radians.py +0 -2
maxframe/tensor/arithmetic/real.py +0 -2
maxframe/tensor/arithmetic/reciprocal.py +5 -3
maxframe/tensor/arithmetic/rint.py +1 -3
maxframe/tensor/arithmetic/rshift.py +0 -2
maxframe/tensor/arithmetic/setimag.py +0 -2
maxframe/tensor/arithmetic/setreal.py +0 -2
maxframe/tensor/arithmetic/sign.py +0 -2
maxframe/tensor/arithmetic/signbit.py +0 -2
maxframe/tensor/arithmetic/sin.py +0 -2
maxframe/tensor/arithmetic/sinc.py +1 -3
maxframe/tensor/arithmetic/sinh.py +0 -2
maxframe/tensor/arithmetic/spacing.py +0 -2
maxframe/tensor/arithmetic/sqrt.py +0 -2
maxframe/tensor/arithmetic/square.py +0 -2
maxframe/tensor/arithmetic/subtract.py +4 -2
maxframe/tensor/arithmetic/tan.py +0 -2
maxframe/tensor/arithmetic/tanh.py +0 -2
maxframe/tensor/arithmetic/tests/__init__.py +0 -2
maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
maxframe/tensor/arithmetic/truediv.py +0 -2
maxframe/tensor/arithmetic/trunc.py +0 -2
maxframe/tensor/arithmetic/utils.py +32 -6
maxframe/tensor/array_utils.py +3 -25
maxframe/tensor/core.py +6 -6
maxframe/tensor/datasource/__init__.py +10 -2
maxframe/tensor/datasource/arange.py +0 -2
maxframe/tensor/datasource/array.py +3 -22
maxframe/tensor/datasource/core.py +15 -10
maxframe/tensor/datasource/diag.py +140 -0
maxframe/tensor/datasource/diagflat.py +69 -0
maxframe/tensor/datasource/empty.py +0 -2
maxframe/tensor/datasource/eye.py +95 -0
maxframe/tensor/datasource/from_dataframe.py +0 -2
maxframe/tensor/datasource/from_dense.py +0 -17
maxframe/tensor/datasource/from_sparse.py +0 -2
maxframe/tensor/datasource/full.py +0 -2
maxframe/tensor/datasource/identity.py +54 -0
maxframe/tensor/datasource/indices.py +115 -0
maxframe/tensor/datasource/linspace.py +140 -0
maxframe/tensor/datasource/meshgrid.py +135 -0
maxframe/tensor/datasource/ones.py +8 -3
maxframe/tensor/datasource/tests/test_datasource.py +32 -1
maxframe/tensor/datasource/tri_array.py +107 -0
maxframe/tensor/datasource/zeros.py +7 -3
maxframe/tensor/extensions/__init__.py +31 -0
maxframe/tensor/extensions/accessor.py +25 -0
maxframe/tensor/extensions/apply_chunk.py +137 -0
maxframe/tensor/indexing/__init__.py +1 -1
maxframe/tensor/indexing/choose.py +8 -6
maxframe/tensor/indexing/compress.py +0 -2
maxframe/tensor/indexing/extract.py +0 -2
maxframe/tensor/indexing/fill_diagonal.py +9 -6
maxframe/tensor/indexing/flatnonzero.py +1 -3
maxframe/tensor/indexing/getitem.py +10 -43
maxframe/tensor/indexing/nonzero.py +2 -4
maxframe/tensor/indexing/setitem.py +19 -9
maxframe/tensor/indexing/slice.py +6 -3
maxframe/tensor/indexing/take.py +0 -2
maxframe/tensor/indexing/tests/__init__.py +0 -2
maxframe/tensor/indexing/tests/test_indexing.py +0 -2
maxframe/tensor/indexing/unravel_index.py +6 -6
maxframe/tensor/lib/__init__.py +16 -0
maxframe/tensor/lib/index_tricks.py +404 -0
maxframe/tensor/linalg/__init__.py +36 -0
maxframe/tensor/linalg/dot.py +145 -0
maxframe/tensor/linalg/inner.py +36 -0
maxframe/tensor/linalg/inv.py +83 -0
maxframe/tensor/linalg/lu.py +115 -0
maxframe/tensor/linalg/matmul.py +225 -0
maxframe/tensor/linalg/qr.py +124 -0
maxframe/tensor/linalg/solve_triangular.py +103 -0
maxframe/tensor/linalg/svd.py +167 -0
maxframe/tensor/linalg/tensordot.py +213 -0
maxframe/tensor/linalg/vdot.py +73 -0
maxframe/tensor/merge/__init__.py +4 -0
maxframe/tensor/merge/append.py +74 -0
maxframe/tensor/merge/column_stack.py +63 -0
maxframe/tensor/merge/concatenate.py +3 -2
maxframe/tensor/merge/dstack.py +71 -0
maxframe/tensor/merge/hstack.py +70 -0
maxframe/tensor/merge/stack.py +0 -2
maxframe/tensor/merge/tests/test_merge.py +0 -2
maxframe/tensor/misc/__init__.py +18 -5
maxframe/tensor/misc/astype.py +10 -8
maxframe/tensor/misc/broadcast_to.py +1 -1
maxframe/tensor/misc/copy.py +64 -0
maxframe/tensor/misc/diff.py +115 -0
maxframe/tensor/misc/flatten.py +63 -0
maxframe/tensor/misc/in1d.py +94 -0
maxframe/tensor/misc/isin.py +130 -0
maxframe/tensor/misc/ndim.py +53 -0
maxframe/tensor/misc/ravel.py +0 -2
maxframe/tensor/misc/repeat.py +129 -0
maxframe/tensor/misc/searchsorted.py +147 -0
maxframe/tensor/misc/setdiff1d.py +58 -0
maxframe/tensor/misc/squeeze.py +117 -0
maxframe/tensor/misc/swapaxes.py +113 -0
maxframe/tensor/misc/tests/test_misc.py +0 -2
maxframe/tensor/misc/transpose.py +8 -4
maxframe/tensor/misc/trapezoid.py +123 -0
maxframe/tensor/misc/unique.py +0 -1
maxframe/tensor/misc/where.py +10 -8
maxframe/tensor/operators.py +0 -34
maxframe/tensor/random/__init__.py +3 -5
maxframe/tensor/random/binomial.py +0 -2
maxframe/tensor/random/bytes.py +0 -2
maxframe/tensor/random/chisquare.py +0 -2
maxframe/tensor/random/choice.py +9 -8
maxframe/tensor/random/core.py +20 -5
maxframe/tensor/random/dirichlet.py +0 -2
maxframe/tensor/random/exponential.py +0 -2
maxframe/tensor/random/f.py +2 -4
maxframe/tensor/random/gamma.py +0 -2
maxframe/tensor/random/geometric.py +0 -2
maxframe/tensor/random/gumbel.py +0 -2
maxframe/tensor/random/hypergeometric.py +0 -2
maxframe/tensor/random/laplace.py +2 -4
maxframe/tensor/random/logistic.py +0 -2
maxframe/tensor/random/lognormal.py +0 -2
maxframe/tensor/random/logseries.py +0 -2
maxframe/tensor/random/multinomial.py +0 -2
maxframe/tensor/random/multivariate_normal.py +0 -2
maxframe/tensor/random/negative_binomial.py +0 -2
maxframe/tensor/random/noncentral_chisquare.py +0 -2
maxframe/tensor/random/noncentral_f.py +1 -3
maxframe/tensor/random/normal.py +0 -2
maxframe/tensor/random/pareto.py +0 -2
maxframe/tensor/random/permutation.py +6 -3
maxframe/tensor/random/poisson.py +0 -2
maxframe/tensor/random/power.py +0 -2
maxframe/tensor/random/rand.py +0 -2
maxframe/tensor/random/randint.py +0 -2
maxframe/tensor/random/randn.py +0 -2
maxframe/tensor/random/random_integers.py +0 -2
maxframe/tensor/random/random_sample.py +0 -2
maxframe/tensor/random/rayleigh.py +0 -2
maxframe/tensor/random/standard_cauchy.py +0 -2
maxframe/tensor/random/standard_exponential.py +0 -2
maxframe/tensor/random/standard_gamma.py +0 -2
maxframe/tensor/random/standard_normal.py +0 -2
maxframe/tensor/random/standard_t.py +0 -2
maxframe/tensor/random/tests/__init__.py +0 -2
maxframe/tensor/random/tests/test_random.py +0 -2
maxframe/tensor/random/triangular.py +0 -2
maxframe/tensor/random/uniform.py +0 -2
maxframe/tensor/random/vonmises.py +0 -2
maxframe/tensor/random/wald.py +0 -2
maxframe/tensor/random/weibull.py +0 -2
maxframe/tensor/random/zipf.py +0 -2
maxframe/tensor/reduction/__init__.py +0 -2
maxframe/tensor/reduction/all.py +0 -2
maxframe/tensor/reduction/allclose.py +0 -2
maxframe/tensor/reduction/any.py +0 -2
maxframe/tensor/reduction/argmax.py +1 -3
maxframe/tensor/reduction/argmin.py +1 -3
maxframe/tensor/reduction/array_equal.py +0 -2
maxframe/tensor/reduction/core.py +0 -2
maxframe/tensor/reduction/count_nonzero.py +0 -2
maxframe/tensor/reduction/cumprod.py +0 -2
maxframe/tensor/reduction/cumsum.py +0 -2
maxframe/tensor/reduction/max.py +0 -2
maxframe/tensor/reduction/mean.py +0 -2
maxframe/tensor/reduction/min.py +0 -2
maxframe/tensor/reduction/nanargmax.py +0 -2
maxframe/tensor/reduction/nanargmin.py +0 -2
maxframe/tensor/reduction/nancumprod.py +0 -2
maxframe/tensor/reduction/nancumsum.py +0 -2
maxframe/tensor/reduction/nanmax.py +0 -2
maxframe/tensor/reduction/nanmean.py +0 -2
maxframe/tensor/reduction/nanmin.py +0 -2
maxframe/tensor/reduction/nanprod.py +0 -2
maxframe/tensor/reduction/nanstd.py +0 -2
maxframe/tensor/reduction/nansum.py +0 -2
maxframe/tensor/reduction/nanvar.py +0 -2
maxframe/tensor/reduction/prod.py +0 -2
maxframe/tensor/reduction/std.py +0 -2
maxframe/tensor/reduction/sum.py +0 -2
maxframe/tensor/reduction/tests/test_reduction.py +1 -4
maxframe/tensor/reduction/var.py +0 -2
maxframe/tensor/reshape/__init__.py +0 -2
maxframe/tensor/reshape/reshape.py +6 -5
maxframe/tensor/reshape/tests/__init__.py +0 -2
maxframe/tensor/reshape/tests/test_reshape.py +0 -2
maxframe/tensor/sort/__init__.py +16 -0
maxframe/tensor/sort/argsort.py +150 -0
maxframe/tensor/sort/sort.py +295 -0
maxframe/tensor/special/__init__.py +37 -0
maxframe/tensor/special/core.py +38 -0
maxframe/tensor/special/misc.py +142 -0
maxframe/tensor/special/statistical.py +56 -0
maxframe/tensor/statistics/__init__.py +5 -0
maxframe/tensor/statistics/average.py +143 -0
maxframe/tensor/statistics/bincount.py +133 -0
maxframe/tensor/statistics/quantile.py +10 -8
maxframe/tensor/ufunc/__init__.py +0 -2
maxframe/tensor/ufunc/ufunc.py +0 -2
maxframe/tensor/utils.py +21 -3
maxframe/tests/test_protocol.py +3 -3
maxframe/tests/test_utils.py +210 -1
maxframe/tests/utils.py +59 -1
maxframe/udf.py +76 -6
maxframe/utils.py +418 -17
{maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
maxframe-2.0.0.dist-info/RECORD +939 -0
{maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
maxframe_client/clients/framedriver.py +19 -3
maxframe_client/fetcher.py +113 -6
maxframe_client/session/odps.py +173 -38
maxframe_client/session/task.py +3 -1
maxframe_client/tests/test_session.py +41 -5
maxframe-1.3.0.dist-info/RECORD +0 -705
{maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0

maxframe/learn/preprocessing/_data/standard_scaler.py ADDED Viewed

@@ -0,0 +1,503 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from .... import tensor as mt
+from ....core import ENTITY_TYPE
+from ....lib import sparse
+from ...core import BaseEstimator, TransformerMixin
+from ...utils.extmath import _incremental_mean_and_var
+from ...utils.validation import (
+    FLOAT_DTYPES,
+    _check_sample_weight,
+    check_array,
+    check_is_fitted,
+)
+from .utils import _handle_zeros_in_scale, _is_constant_feature
+class StandardScaler(TransformerMixin, BaseEstimator):
+    """Standardize features by removing the mean and scaling to unit variance.
+    The standard score of a sample `x` is calculated as:
+    .. code-block:: text
+        z = (x - u) / s
+    where `u` is the mean of the training samples or zero if `with_mean=False`,
+    and `s` is the standard deviation of the training samples or one if
+    `with_std=False`.
+    Centering and scaling happen independently on each feature by computing
+    the relevant statistics on the samples in the training set. Mean and
+    standard deviation are then stored to be used on later data using
+    :meth:`transform`.
+    Standardization of a dataset is a common requirement for many
+    machine learning estimators: they might behave badly if the
+    individual features do not more or less look like standard normally
+    distributed data (e.g. Gaussian with 0 mean and unit variance).
+    For instance many elements used in the objective function of
+    a learning algorithm (such as the RBF kernel of Support Vector
+    Machines or the L1 and L2 regularizers of linear models) assume that
+    all features are centered around 0 and have variance in the same
+    order. If a feature has a variance that is orders of magnitude larger
+    than others, it might dominate the objective function and make the
+    estimator unable to learn from other features correctly as expected.
+    `StandardScaler` is sensitive to outliers, and the features may scale
+    differently from each other in the presence of outliers. For an example
+    visualization, refer to :ref:`Compare StandardScaler with other scalers
+    <plot_all_scaling_standard_scaler_section>`.
+    This scaler can also be applied to sparse CSR or CSC matrices by passing
+    `with_mean=False` to avoid breaking the sparsity structure of the data.
+    Read more in the :ref:`User Guide <preprocessing_scaler>`.
+    Parameters
+    ----------
+    copy : bool, default=True
+        If False, try to avoid a copy and do inplace scaling instead.
+        This is not guaranteed to always work inplace; e.g. if the data is
+        not a NumPy array or scipy.sparse CSR matrix, a copy may still be
+        returned.
+    with_mean : bool, default=True
+        If True, center the data before scaling.
+        This does not work (and will raise an exception) when attempted on
+        sparse matrices, because centering them entails building a dense
+        matrix which in common use cases is likely to be too large to fit in
+        memory.
+    with_std : bool, default=True
+        If True, scale the data to unit variance (or equivalently,
+        unit standard deviation).
+    Attributes
+    ----------
+    scale_ : ndarray of shape (n_features,) or None
+        Per feature relative scaling of the data to achieve zero mean and unit
+        variance. Generally this is calculated using `np.sqrt(var_)`. If a
+        variance is zero, we can't achieve unit variance, and the data is left
+        as-is, giving a scaling factor of 1. `scale_` is equal to `None`
+        when `with_std=False`.
+    mean_ : ndarray of shape (n_features,) or None
+        The mean value for each feature in the training set.
+        Equal to ``None`` when ``with_mean=False`` and ``with_std=False``.
+    var_ : ndarray of shape (n_features,) or None
+        The variance for each feature in the training set. Used to compute
+        `scale_`. Equal to ``None`` when ``with_mean=False`` and
+        ``with_std=False``.
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+    n_samples_seen_ : int or ndarray of shape (n_features,)
+        The number of samples processed by the estimator for each feature.
+        If there are no missing samples, the ``n_samples_seen`` will be an
+        integer, otherwise it will be an array of dtype int. If
+        `sample_weights` are used it will be a float (if no missing data)
+        or an array of dtype float that sums the weights seen so far.
+        Will be reset on new calls to fit, but increments across
+        ``partial_fit`` calls.
+    See Also
+    --------
+    scale : Equivalent function without the estimator API.
+    :class:`~sklearn.decomposition.PCA` : Further removes the linear
+        correlation across features with 'whiten=True'.
+    Notes
+    -----
+    NaNs are treated as missing values: disregarded in fit, and maintained in
+    transform.
+    We use a biased estimator for the standard deviation, equivalent to
+    `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to
+    affect model performance.
+    Examples
+    --------
+    >>> from maxframe.learn.preprocessing import StandardScaler
+    >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]
+    >>> scaler = StandardScaler()
+    >>> print(scaler.fit(data))
+    StandardScaler()
+    >>> print(scaler.mean_.execute())
+    [0.5 0.5]
+    >>> print(scaler.transform(data).execute())
+    [[-1. -1.]
+     [-1. -1.]
+     [ 1.  1.]
+     [ 1.  1.]]
+    >>> print(scaler.transform([[2, 2]]).execute())
+    [[3. 3.]]
+    """
+    def __init__(self, *, copy=True, with_mean=True, with_std=True):
+        self.with_mean = with_mean
+        self.with_std = with_std
+        self.copy = copy
+    def _reset(self):
+        """Reset internal data-dependent state of the scaler, if necessary.
+        __init__ parameters are not touched.
+        """
+        # Checking one attribute is enough, because they are all set together
+        # in partial_fit
+        if hasattr(self, "scale_"):
+            del self.scale_
+            del self.n_samples_seen_
+            del self.mean_
+            del self.var_
+    def fit(
+        self,
+        X,
+        y=None,
+        sample_weight=None,
+        execute=False,
+        session=None,
+        run_kwargs=None,
+    ) -> "StandardScaler":
+        """Compute the mean and std to be used for later scaling.
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The data used to compute the mean and standard deviation
+            used for later scaling along the features axis.
+        y : None
+            Ignored.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Individual weights for each sample.
+        Returns
+        -------
+        self : object
+            Fitted scaler.
+        """
+        # Reset internal state before fitting
+        self._reset()
+        return self.partial_fit(
+            X, y, sample_weight, execute=execute, session=session, run_kwargs=run_kwargs
+        )
+    def partial_fit(
+        self,
+        X,
+        y=None,
+        sample_weight=None,
+        execute=False,
+        session=None,
+        run_kwargs=None,
+    ):
+        """Online computation of mean and std on X for later scaling.
+        All of X is processed as a single batch. This is intended for cases
+        when :meth:`fit` is not feasible due to very large number of
+        `n_samples` or because X is read from a continuous stream.
+        The algorithm for incremental mean and std is given in Equation 1.5a,b
+        in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. "Algorithms
+        for computing the sample variance: Analysis and recommendations."
+        The American Statistician 37.3 (1983): 242-247:
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The data used to compute the mean and standard deviation
+            used for later scaling along the features axis.
+        y : None
+            Ignored.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Individual weights for each sample.
+        Returns
+        -------
+        self : object
+            Fitted scaler.
+        """
+        first_call = not hasattr(self, "n_samples_seen_")
+        X = self._validate_data(
+            X,
+            accept_sparse=("csr", "csc"),
+            dtype=FLOAT_DTYPES,
+            force_all_finite="allow-nan",
+            reset=first_call,
+        )
+        n_features = X.shape[1]
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
+        # Even in the case of `with_mean=False`, we update the mean anyway
+        # This is needed for the incremental computation of the var
+        # See incr_mean_variance_axis and _incremental_mean_variance_axis
+        # if n_samples_seen_ is an integer (i.e. no missing values), we need to
+        # transform it to a NumPy array of shape (n_features,) required by
+        # incr_mean_variance_axis and _incremental_variance_axis
+        dtype = np.int64 if sample_weight is None else X.dtype
+        if not hasattr(self, "n_samples_seen_"):
+            self.n_samples_seen_ = mt.zeros(n_features, dtype=dtype)
+        # elif np.size(self.n_samples_seen_) == 1:
+        #     self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])
+        #     self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)
+        if sparse.issparse(X):
+            raise NotImplementedError("Scaling on sparse tensors is not supported")
+        else:
+            # First pass
+            if not hasattr(self, "scale_"):
+                self.mean_ = 0.0
+                if self.with_std:
+                    self.var_ = 0.0
+                else:
+                    self.var_ = None
+            if not self.with_mean and not self.with_std:
+                self.mean_ = None
+                self.var_ = None
+                self.n_samples_seen_ += X.shape[0] - mt.isnan(X).sum(axis=0)
+            else:
+                self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var(
+                    X,
+                    self.mean_,
+                    self.var_,
+                    self.n_samples_seen_,
+                    sample_weight=sample_weight,
+                )
+        # # for backward-compatibility, reduce n_samples_seen_ to an integer
+        # # if the number of samples is the same for each feature (i.e. no
+        # # missing values)
+        # if np.ptp(self.n_samples_seen_) == 0:
+        #     self.n_samples_seen_ = self.n_samples_seen_[0]
+        if self.with_std:
+            # Extract the list of near constant features on the raw variances,
+            # before taking the square root.
+            constant_mask = _is_constant_feature(
+                self.var_, self.mean_, self.n_samples_seen_
+            )
+            self.scale_ = _handle_zeros_in_scale(
+                mt.sqrt(self.var_), copy=False, constant_mask=constant_mask
+            )
+        else:
+            self.scale_ = None
+        if execute:
+            self.execute(session=session, **(run_kwargs or dict()))
+        return self
+    def transform(self, X, copy=None, execute=False, session=None, run_kwargs=None):
+        """Perform standardization by centering and scaling.
+        Parameters
+        ----------
+        X : {array-like, sparse matrix of shape (n_samples, n_features)
+            The data used to scale along the features axis.
+        copy : bool, default=None
+            Copy the input X or not.
+        Returns
+        -------
+        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)
+            Transformed array.
+        """
+        check_is_fitted(self)
+        copy = copy if copy is not None else self.copy
+        X = self._validate_data(
+            X,
+            reset=False,
+            accept_sparse="csr",
+            copy=copy,
+            dtype=FLOAT_DTYPES,
+            force_all_finite="allow-nan",
+        )
+        if sparse.issparse(X):
+            raise NotImplementedError("Scaling on sparse tensors is not supported")
+        else:
+            if self.with_mean:
+                X -= self.mean_
+            if self.with_std:
+                X /= self.scale_
+        if execute:
+            X.execute(session=session, **(run_kwargs or dict()))
+        return X
+    def inverse_transform(
+        self, X, copy=None, execute=False, session=None, run_kwargs=None
+    ):
+        """Scale back the data to the original representation.
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The data used to scale along the features axis.
+        copy : bool, default=None
+            Copy the input X or not.
+        Returns
+        -------
+        X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)
+            Transformed array.
+        """
+        check_is_fitted(self)
+        copy = copy if copy is not None else self.copy
+        X = check_array(
+            X,
+            accept_sparse="csr",
+            copy=copy,
+            dtype=FLOAT_DTYPES,
+            force_all_finite="allow-nan",
+        )
+        if sparse.issparse(X):
+            raise NotImplementedError("Scaling on sparse tensors is not supported")
+        else:
+            if self.with_std:
+                X *= self.scale_
+            if self.with_mean:
+                X += self.mean_
+        if execute:
+            X.execute(session=session, **(run_kwargs or dict()))
+        return X
+def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
+    """Standardize a dataset along any axis.
+    Center to the mean and component wise scale to unit variance.
+    Read more in the :ref:`User Guide <preprocessing_scaler>`.
+    Parameters
+    ----------
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        The data to center and scale.
+    axis : {0, 1}, default=0
+        Axis used to compute the means and standard deviations along. If 0,
+        independently standardize each feature, otherwise (if 1) standardize
+        each sample.
+    with_mean : bool, default=True
+        If True, center the data before scaling.
+    with_std : bool, default=True
+        If True, scale the data to unit variance (or equivalently,
+        unit standard deviation).
+    copy : bool, default=True
+        If False, try to avoid a copy and scale in place.
+        This is not guaranteed to always work in place; e.g. if the data is
+        a numpy array with an int dtype, a copy will be returned even with
+        copy=False.
+    Returns
+    -------
+    X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)
+        The transformed data.
+    See Also
+    --------
+    StandardScaler : Performs scaling to unit variance using the Transformer
+        API (e.g. as part of a preprocessing
+        :class:`~sklearn.pipeline.Pipeline`).
+    Notes
+    -----
+    This implementation will refuse to center scipy.sparse matrices
+    since it would make them non-sparse and would potentially crash the
+    program with memory exhaustion problems.
+    Instead the caller is expected to either set explicitly
+    `with_mean=False` (in that case, only variance scaling will be
+    performed on the features of the CSC matrix) or to call `X.toarray()`
+    if he/she expects the materialized dense array to fit in memory.
+    To avoid memory copy the caller should pass a CSC matrix.
+    NaNs are treated as missing values: disregarded to compute the statistics,
+    and maintained during the data transformation.
+    We use a biased estimator for the standard deviation, equivalent to
+    `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to
+    affect model performance.
+    For a comparison of the different scalers, transformers, and normalizers,
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
+    .. warning:: Risk of data leak
+        Do not use :func:`~sklearn.preprocessing.scale` unless you know
+        what you are doing. A common mistake is to apply it to the entire data
+        *before* splitting into training and test sets. This will bias the
+        model evaluation because information would have leaked from the test
+        set to the training set.
+        In general, we recommend using
+        :class:`~sklearn.preprocessing.StandardScaler` within a
+        :ref:`Pipeline <pipeline>` in order to prevent most risks of data
+        leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.
+    Examples
+    --------
+    >>> from maxframe.learn.preprocessing import scale
+    >>> X = [[-2, 1, 2], [-1, 0, 1]]
+    >>> scale(X, axis=0).execute()  # scaling each column independently
+    array([[-1.,  1.,  1.],
+           [ 1., -1., -1.]])
+    >>> scale(X, axis=1).execute()  # scaling each row independently
+    array([[-1.37...,  0.39...,  0.98...],
+           [-1.22...,  0.     ,  1.22...]])
+    """
+    if not isinstance(X, ENTITY_TYPE):
+        X = mt.tensor(X)
+    ndim = X.ndim
+    if ndim == 1:
+        X = X.reshape((X.shape[0], 1))
+    if axis == 1:
+        X = X.T
+    scaler = StandardScaler(with_mean=with_mean, with_std=with_std, copy=copy)
+    transformed = scaler.fit_transform(X)
+    if axis == 1:
+        transformed = transformed.T
+    if ndim == 1:
+        transformed = transformed.reshape(transformed.shape[0])
+    return transformed

maxframe/learn/preprocessing/_data/utils.py ADDED Viewed

@@ -0,0 +1,79 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from .... import tensor as mt
+from ....tensor.core import TENSOR_TYPE
+def _handle_zeros_in_scale(scale, copy=True, constant_mask=None):
+    """Set scales of near constant features to 1.
+    The goal is to avoid division by very small or zero values.
+    Near constant features are detected automatically by identifying
+    scales close to machine precision unless they are precomputed by
+    the caller and passed with the `constant_mask` kwarg.
+    Typically for standard scaling, the scales are the standard
+    deviation while near constant features are better detected on the
+    computed variances which are closer to machine precision by
+    construction.
+    """
+    # if we are fitting on 1D arrays, scale might be a scalar
+    if np.isscalar(scale):  # pragma: no cover
+        if scale == 0.0:
+            scale = 1.0
+        return scale
+    elif hasattr(scale, "ndim") and scale.ndim == 0:  # pragma: no cover
+        # scalar that is tensor
+        return mt.where(scale == 0.0, 1.0, scale)
+    elif isinstance(scale, (np.ndarray, TENSOR_TYPE)):
+        if copy:
+            # New array to avoid side-effects
+            scale = scale.copy()
+        if constant_mask is None:
+            constant_mask = scale < 10 * np.finfo(scale.dtype).eps
+        scale[constant_mask] = 1.0
+        return scale
+def _get_real_sample_size(tileable):
+    if not np.isnan(tileable.shape[0]):
+        return mt.asarray(tileable.shape[0])
+    def get_real_shape(s):
+        return s.shape[0]
+    slc = [slice(None, None, None)] + [0] * (tileable.ndim - 1)
+    return tileable[slc].mf.apply_chunk(get_real_shape, shape=(np.nan,)).sum()
+def _is_constant_feature(var, mean, n_samples):
+    """Detect if a feature is indistinguishable from a constant feature.
+    The detection is based on its computed variance and on the theoretical
+    error bounds of the '2 pass algorithm' for variance computation.
+    See "Algorithms for computing the sample variance: analysis and
+    recommendations", by Chan, Golub, and LeVeque.
+    """
+    # In scikit-learn, variance is always computed using float64 accumulators.
+    eps = np.finfo(np.float64).eps
+    upper_bound = n_samples * eps * var + (n_samples * mean * eps) ** 2
+    return var <= upper_bound

maxframe/learn/preprocessing/_label/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ._label_binarizer import LabelBinarizer, _label_binarize, label_binarize
+from ._label_encoder import LabelEncoder