maxframe 1.3.0__cp310-cp310-macosx_10_9_universal2.whl → 2.0.0__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +109 -19
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +62 -9
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +128 -3
- maxframe/dataframe/reduction/core.py +20 -6
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +10 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +21 -58
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/models/dashscope.py +34 -0
- maxframe/learn/contrib/llm/models/managed.py +15 -0
- maxframe/learn/contrib/llm/multi_modal.py +92 -0
- maxframe/learn/contrib/llm/text.py +21 -5
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +54 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +13 -8
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +59 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
- maxframe-2.0.0.dist-info/RECORD +939 -0
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.0.dist-info/RECORD +0 -705
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from ... import tensor as mt
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Use at least float64 for the accumulating functions to avoid precision issue
|
|
21
|
+
# see https://github.com/numpy/numpy/issues/9393. The float64 is also retained
|
|
22
|
+
# as it is in case the float overflows
|
|
23
|
+
def _safe_accumulator_op(op, x, *args, **kwargs):
|
|
24
|
+
"""
|
|
25
|
+
This function provides numpy accumulator functions with a float64 dtype
|
|
26
|
+
when used on a floating point input. This prevents accumulator overflow on
|
|
27
|
+
smaller floating point dtypes.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
op : function
|
|
32
|
+
A accumulator function such as np.mean or np.sum
|
|
33
|
+
x : numpy array
|
|
34
|
+
A tensor to apply the accumulator function
|
|
35
|
+
*args : positional arguments
|
|
36
|
+
Positional arguments passed to the accumulator function after the
|
|
37
|
+
input x
|
|
38
|
+
**kwargs : keyword arguments
|
|
39
|
+
Keyword arguments passed to the accumulator function
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
result : The output of the accumulator function passed to this function
|
|
44
|
+
"""
|
|
45
|
+
if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:
|
|
46
|
+
result = op(x, *args, **kwargs, dtype=np.float64)
|
|
47
|
+
else:
|
|
48
|
+
result = op(x, *args, **kwargs)
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _incremental_mean_and_var(
|
|
53
|
+
X, last_mean, last_variance, last_sample_count, sample_weight=None
|
|
54
|
+
):
|
|
55
|
+
"""Calculate mean update and a Youngs and Cramer variance update.
|
|
56
|
+
|
|
57
|
+
If sample_weight is given, the weighted mean and variance is computed.
|
|
58
|
+
|
|
59
|
+
Update a given mean and (possibly) variance according to new data given
|
|
60
|
+
in X. last_mean is always required to compute the new mean.
|
|
61
|
+
If last_variance is None, no variance is computed and None return for
|
|
62
|
+
updated_variance.
|
|
63
|
+
|
|
64
|
+
From the paper "Algorithms for computing the sample variance: analysis and
|
|
65
|
+
recommendations", by Chan, Golub, and LeVeque.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
X : array-like of shape (n_samples, n_features)
|
|
70
|
+
Data to use for variance update.
|
|
71
|
+
|
|
72
|
+
last_mean : array-like of shape (n_features,)
|
|
73
|
+
|
|
74
|
+
last_variance : array-like of shape (n_features,)
|
|
75
|
+
|
|
76
|
+
last_sample_count : array-like of shape (n_features,)
|
|
77
|
+
The number of samples encountered until now if sample_weight is None.
|
|
78
|
+
If sample_weight is not None, this is the sum of sample_weight
|
|
79
|
+
encountered.
|
|
80
|
+
|
|
81
|
+
sample_weight : array-like of shape (n_samples,) or None
|
|
82
|
+
Sample weights. If None, compute the unweighted mean/variance.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
updated_mean : ndarray of shape (n_features,)
|
|
87
|
+
|
|
88
|
+
updated_variance : ndarray of shape (n_features,)
|
|
89
|
+
None if last_variance was None.
|
|
90
|
+
|
|
91
|
+
updated_sample_count : ndarray of shape (n_features,)
|
|
92
|
+
|
|
93
|
+
Notes
|
|
94
|
+
-----
|
|
95
|
+
NaNs are ignored during the algorithm.
|
|
96
|
+
|
|
97
|
+
References
|
|
98
|
+
----------
|
|
99
|
+
T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample
|
|
100
|
+
variance: recommendations, The American Statistician, Vol. 37, No. 3,
|
|
101
|
+
pp. 242-247
|
|
102
|
+
|
|
103
|
+
Also, see the sparse implementation of this in
|
|
104
|
+
`utils.sparsefuncs.incr_mean_variance_axis` and
|
|
105
|
+
`utils.sparsefuncs_fast.incr_mean_variance_axis0`
|
|
106
|
+
"""
|
|
107
|
+
# old = stats until now
|
|
108
|
+
# new = the current increment
|
|
109
|
+
# updated = the aggregated stats
|
|
110
|
+
last_sum = last_mean * last_sample_count
|
|
111
|
+
X_nan_mask = mt.isnan(X)
|
|
112
|
+
# if mt.any(X_nan_mask):
|
|
113
|
+
# sum_op = mt.nansum
|
|
114
|
+
# else:
|
|
115
|
+
# sum_op = mt.sum
|
|
116
|
+
sum_op = mt.nansum
|
|
117
|
+
if sample_weight is not None:
|
|
118
|
+
# equivalent to np.nansum(X * sample_weight, axis=0)
|
|
119
|
+
# safer because np.float64(X*W) != np.float64(X)*np.float64(W)
|
|
120
|
+
new_sum = _safe_accumulator_op(
|
|
121
|
+
mt.matmul, sample_weight, mt.where(X_nan_mask, 0, X)
|
|
122
|
+
)
|
|
123
|
+
new_sample_count = _safe_accumulator_op(
|
|
124
|
+
mt.sum, sample_weight[:, None] * (~X_nan_mask), axis=0
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
new_sum = _safe_accumulator_op(sum_op, X, axis=0)
|
|
128
|
+
n_samples = X.shape[0]
|
|
129
|
+
new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
|
|
130
|
+
|
|
131
|
+
updated_sample_count = last_sample_count + new_sample_count
|
|
132
|
+
|
|
133
|
+
updated_mean = (last_sum + new_sum) / updated_sample_count
|
|
134
|
+
|
|
135
|
+
if last_variance is None:
|
|
136
|
+
updated_variance = None
|
|
137
|
+
else:
|
|
138
|
+
T = new_sum / new_sample_count
|
|
139
|
+
temp = X - T
|
|
140
|
+
if sample_weight is not None:
|
|
141
|
+
# equivalent to np.nansum((X-T)**2 * sample_weight, axis=0)
|
|
142
|
+
# safer because np.float64(X*W) != np.float64(X)*np.float64(W)
|
|
143
|
+
correction = _safe_accumulator_op(
|
|
144
|
+
mt.matmul, sample_weight, mt.where(X_nan_mask, 0, temp)
|
|
145
|
+
)
|
|
146
|
+
temp **= 2
|
|
147
|
+
new_unnormalized_variance = _safe_accumulator_op(
|
|
148
|
+
mt.matmul, sample_weight, mt.where(X_nan_mask, 0, temp)
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
correction = _safe_accumulator_op(sum_op, temp, axis=0)
|
|
152
|
+
temp **= 2
|
|
153
|
+
new_unnormalized_variance = _safe_accumulator_op(sum_op, temp, axis=0)
|
|
154
|
+
|
|
155
|
+
# correction term of the corrected 2 pass algorithm.
|
|
156
|
+
# See "Algorithms for computing the sample variance: analysis
|
|
157
|
+
# and recommendations", by Chan, Golub, and LeVeque.
|
|
158
|
+
new_unnormalized_variance -= correction**2 / new_sample_count
|
|
159
|
+
|
|
160
|
+
last_unnormalized_variance = last_variance * last_sample_count
|
|
161
|
+
|
|
162
|
+
with mt.errstate(divide="ignore", invalid="ignore"):
|
|
163
|
+
last_over_new_count = last_sample_count / new_sample_count
|
|
164
|
+
updated_unnormalized_variance = (
|
|
165
|
+
last_unnormalized_variance
|
|
166
|
+
+ new_unnormalized_variance
|
|
167
|
+
+ last_over_new_count
|
|
168
|
+
/ updated_sample_count
|
|
169
|
+
* (last_sum / last_over_new_count - new_sum) ** 2
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
zeros = last_sample_count == 0
|
|
173
|
+
updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
|
|
174
|
+
updated_variance = updated_unnormalized_variance / updated_sample_count
|
|
175
|
+
|
|
176
|
+
return updated_mean, updated_variance, updated_sample_count
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from collections.abc import Sequence
|
|
16
|
+
from typing import List
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
from scipy.sparse import spmatrix
|
|
20
|
+
|
|
21
|
+
from ... import opcodes
|
|
22
|
+
from ... import tensor as mt
|
|
23
|
+
from ...core import ENTITY_TYPE, TILEABLE_TYPE, OutputType
|
|
24
|
+
from ...core.operator import Operator
|
|
25
|
+
from ...serialization.serializables import AnyField, ListField
|
|
26
|
+
from ...tensor.core import TENSOR_TYPE, TensorOrder
|
|
27
|
+
from ...typing_ import EntityType, TileableType
|
|
28
|
+
from ...udf import builtin_function
|
|
29
|
+
from ..core import LearnOperatorMixin
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class UniqueLabels(Operator, LearnOperatorMixin):
|
|
33
|
+
_op_type_ = opcodes.UNIQUE_LABELS
|
|
34
|
+
|
|
35
|
+
ys = ListField("ys")
|
|
36
|
+
|
|
37
|
+
def __call__(self, ys: List[TileableType]):
|
|
38
|
+
self._output_types = [OutputType.tensor]
|
|
39
|
+
inputs = [y for y in ys if isinstance(y, TILEABLE_TYPE)]
|
|
40
|
+
return self.new_tileable(
|
|
41
|
+
inputs,
|
|
42
|
+
shape=(np.nan,),
|
|
43
|
+
dtype=mt.tensor(ys[0]).dtype,
|
|
44
|
+
order=TensorOrder.C_ORDER,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def unique_labels(*ys):
|
|
49
|
+
"""
|
|
50
|
+
Extract an ordered array of unique labels.
|
|
51
|
+
|
|
52
|
+
We don't allow:
|
|
53
|
+
- mix of multilabel and multiclass (single label) targets
|
|
54
|
+
- mix of label indicator matrix and anything else,
|
|
55
|
+
because there are no explicit labels)
|
|
56
|
+
- mix of label indicator matrices of different sizes
|
|
57
|
+
- mix of string and integer labels
|
|
58
|
+
|
|
59
|
+
At the moment, we also don't allow "multiclass-multioutput" input type.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
*ys : array-likes
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
out : ndarray of shape (n_unique_labels,)
|
|
68
|
+
An ordered array of unique labels.
|
|
69
|
+
|
|
70
|
+
Examples
|
|
71
|
+
--------
|
|
72
|
+
>>> from maxframe.learn.utils.multiclass import unique_labels
|
|
73
|
+
>>> unique_labels([3, 5, 5, 5, 7, 7]).execute()
|
|
74
|
+
array([3, 5, 7])
|
|
75
|
+
>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4]).execute()
|
|
76
|
+
array([1, 2, 3, 4])
|
|
77
|
+
>>> unique_labels([1, 2, 10], [5, 11]).execute()
|
|
78
|
+
array([ 1, 2, 5, 10, 11])
|
|
79
|
+
"""
|
|
80
|
+
if not ys:
|
|
81
|
+
raise ValueError("No argument has been passed.")
|
|
82
|
+
|
|
83
|
+
ys = list(ys)
|
|
84
|
+
op = UniqueLabels(ys=ys)
|
|
85
|
+
return op(ys)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class IsMultilabel(Operator, LearnOperatorMixin):
|
|
89
|
+
_op_type_ = opcodes.IS_MULTILABEL
|
|
90
|
+
|
|
91
|
+
y = AnyField("y")
|
|
92
|
+
|
|
93
|
+
def __call__(self, y):
|
|
94
|
+
self._output_types = [OutputType.tensor]
|
|
95
|
+
inputs = [y] if isinstance(y, ENTITY_TYPE) else []
|
|
96
|
+
return self.new_tileable(
|
|
97
|
+
inputs, shape=(), dtype=np.dtype(bool), order=TensorOrder.C_ORDER
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def _set_inputs(cls, op: "IsMultilabel", inputs: List[EntityType]):
|
|
102
|
+
super()._set_inputs(op, inputs)
|
|
103
|
+
if op._inputs:
|
|
104
|
+
op.y = op._inputs[0]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def is_multilabel(y):
|
|
108
|
+
"""
|
|
109
|
+
Check if ``y`` is in a multilabel format.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
y : numpy array of shape [n_samples]
|
|
114
|
+
Target values.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
out : bool,
|
|
119
|
+
Return ``True``, if ``y`` is in a multilabel format, else ```False``.
|
|
120
|
+
|
|
121
|
+
Examples
|
|
122
|
+
--------
|
|
123
|
+
>>> import maxframe.tensor as mt
|
|
124
|
+
>>> from maxframe.learn.utils.multiclass import is_multilabel
|
|
125
|
+
>>> is_multilabel([0, 1, 0, 1]).execute()
|
|
126
|
+
False
|
|
127
|
+
>>> is_multilabel([[1], [0, 2], []]).execute()
|
|
128
|
+
False
|
|
129
|
+
>>> is_multilabel(mt.array([[1, 0], [0, 0]])).execute()
|
|
130
|
+
True
|
|
131
|
+
>>> is_multilabel(mt.array([[1], [0], [0]])).execute()
|
|
132
|
+
False
|
|
133
|
+
>>> is_multilabel(mt.array([[1, 0, 0]])).execute()
|
|
134
|
+
True
|
|
135
|
+
"""
|
|
136
|
+
if not isinstance(y, ENTITY_TYPE):
|
|
137
|
+
if hasattr(y, "__array__") or isinstance(y, Sequence):
|
|
138
|
+
y = np.asarray(y)
|
|
139
|
+
yt = None
|
|
140
|
+
else:
|
|
141
|
+
yt = y = mt.tensor(y)
|
|
142
|
+
|
|
143
|
+
op = IsMultilabel(y=y)
|
|
144
|
+
return op(yt)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class TypeOfTarget(Operator, LearnOperatorMixin):
|
|
148
|
+
_op_type_ = opcodes.TYPE_OF_TARGET
|
|
149
|
+
|
|
150
|
+
y = AnyField("y")
|
|
151
|
+
|
|
152
|
+
def __call__(self, y: TileableType):
|
|
153
|
+
self._output_types = [OutputType.tensor]
|
|
154
|
+
inputs = [y] if isinstance(y, ENTITY_TYPE) else []
|
|
155
|
+
return self.new_tileable(
|
|
156
|
+
inputs, shape=(), order=TensorOrder.C_ORDER, dtype=np.dtype(object)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def _set_inputs(cls, op: "TypeOfTarget", inputs: List[EntityType]):
|
|
161
|
+
super()._set_inputs(op, inputs)
|
|
162
|
+
if op._inputs:
|
|
163
|
+
op.y = op._inputs[0]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def type_of_target(y):
|
|
167
|
+
"""
|
|
168
|
+
Determine the type of data indicated by the target.
|
|
169
|
+
|
|
170
|
+
Note that this type is the most specific type that can be inferred.
|
|
171
|
+
For example:
|
|
172
|
+
|
|
173
|
+
* ``binary`` is more specific but compatible with ``multiclass``.
|
|
174
|
+
* ``multiclass`` of integers is more specific but compatible with
|
|
175
|
+
``continuous``.
|
|
176
|
+
* ``multilabel-indicator`` is more specific but compatible with
|
|
177
|
+
``multiclass-multioutput``.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
y : array-like
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
target_type : string
|
|
186
|
+
One of:
|
|
187
|
+
|
|
188
|
+
* 'continuous': `y` is an array-like of floats that are not all
|
|
189
|
+
integers, and is 1d or a column vector.
|
|
190
|
+
* 'continuous-multioutput': `y` is a 2d tensor of floats that are
|
|
191
|
+
not all integers, and both dimensions are of size > 1.
|
|
192
|
+
* 'binary': `y` contains <= 2 discrete values and is 1d or a column
|
|
193
|
+
vector.
|
|
194
|
+
* 'multiclass': `y` contains more than two discrete values, is not a
|
|
195
|
+
sequence of sequences, and is 1d or a column vector.
|
|
196
|
+
* 'multiclass-multioutput': `y` is a 2d tensor that contains more
|
|
197
|
+
than two discrete values, is not a sequence of sequences, and both
|
|
198
|
+
dimensions are of size > 1.
|
|
199
|
+
* 'multilabel-indicator': `y` is a label indicator matrix, a tensor
|
|
200
|
+
of two dimensions with at least two columns, and at most 2 unique
|
|
201
|
+
values.
|
|
202
|
+
* 'unknown': `y` is array-like but none of the above, such as a 3d
|
|
203
|
+
tensor, sequence of sequences, or a tensor of non-sequence objects.
|
|
204
|
+
|
|
205
|
+
Examples
|
|
206
|
+
--------
|
|
207
|
+
>>> import maxframe.tensor as mt
|
|
208
|
+
>>> from maxframe.learn.utils.multiclass import type_of_target
|
|
209
|
+
>>> type_of_target([0.1, 0.6]).execute()
|
|
210
|
+
'continuous'
|
|
211
|
+
>>> type_of_target([1, -1, -1, 1]).execute()
|
|
212
|
+
'binary'
|
|
213
|
+
>>> type_of_target(['a', 'b', 'a']).execute()
|
|
214
|
+
'binary'
|
|
215
|
+
>>> type_of_target([1.0, 2.0]).execute()
|
|
216
|
+
'binary'
|
|
217
|
+
>>> type_of_target([1, 0, 2]).execute()
|
|
218
|
+
'multiclass'
|
|
219
|
+
>>> type_of_target([1.0, 0.0, 3.0]).execute()
|
|
220
|
+
'multiclass'
|
|
221
|
+
>>> type_of_target(['a', 'b', 'c']).execute()
|
|
222
|
+
'multiclass'
|
|
223
|
+
>>> type_of_target(mt.array([[1, 2], [3, 1]])).execute()
|
|
224
|
+
'multiclass-multioutput'
|
|
225
|
+
>>> type_of_target([[1, 2]]).execute()
|
|
226
|
+
'multiclass-multioutput'
|
|
227
|
+
>>> type_of_target(mt.array([[1.5, 2.0], [3.0, 1.6]])).execute()
|
|
228
|
+
'continuous-multioutput'
|
|
229
|
+
>>> type_of_target(mt.array([[0, 1], [1, 1]])).execute()
|
|
230
|
+
'multilabel-indicator'
|
|
231
|
+
"""
|
|
232
|
+
if isinstance(y, TENSOR_TYPE):
|
|
233
|
+
y = mt.tensor(y)
|
|
234
|
+
|
|
235
|
+
valid_types = (Sequence, spmatrix) if spmatrix is not None else (Sequence,)
|
|
236
|
+
valid = (
|
|
237
|
+
isinstance(y, valid_types)
|
|
238
|
+
or hasattr(y, "__array__")
|
|
239
|
+
or hasattr(y, "__maxframe_tensor__")
|
|
240
|
+
) and not isinstance(y, str)
|
|
241
|
+
|
|
242
|
+
if not valid:
|
|
243
|
+
raise ValueError(f"Expected array-like (array or non-string sequence), got {y}")
|
|
244
|
+
|
|
245
|
+
sparse_pandas = type(y).__name__ in ["SparseSeries", "SparseArray"]
|
|
246
|
+
if sparse_pandas: # pragma: no cover
|
|
247
|
+
raise ValueError("y cannot be class 'SparseSeries' or 'SparseArray'")
|
|
248
|
+
|
|
249
|
+
if isinstance(y, ENTITY_TYPE):
|
|
250
|
+
y = mt.tensor(y)
|
|
251
|
+
|
|
252
|
+
op = TypeOfTarget(y=y)
|
|
253
|
+
return op(y)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@builtin_function
|
|
257
|
+
def _check_class_target_name_and_return(t, t_type=None):
|
|
258
|
+
t_type = t_type or t
|
|
259
|
+
if t_type not in [
|
|
260
|
+
"binary",
|
|
261
|
+
"multiclass",
|
|
262
|
+
"multiclass-multioutput",
|
|
263
|
+
"multilabel-indicator",
|
|
264
|
+
"multilabel-sequences",
|
|
265
|
+
]:
|
|
266
|
+
raise ValueError("Unknown label type: %r" % t_type)
|
|
267
|
+
return t
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def check_classification_targets(y, return_value: bool = False):
|
|
271
|
+
"""
|
|
272
|
+
Ensure that target y is of a non-regression type.
|
|
273
|
+
|
|
274
|
+
Only the following target types (as defined in type_of_target) are allowed:
|
|
275
|
+
'binary', 'multiclass', 'multiclass-multioutput',
|
|
276
|
+
'multilabel-indicator', 'multilabel-sequences'
|
|
277
|
+
|
|
278
|
+
Parameters
|
|
279
|
+
----------
|
|
280
|
+
y : array-like
|
|
281
|
+
"""
|
|
282
|
+
y_type = type_of_target(y)
|
|
283
|
+
|
|
284
|
+
y_type = y_type.mf.apply_chunk(
|
|
285
|
+
_check_class_target_name_and_return, dtype=y_type.dtype
|
|
286
|
+
)
|
|
287
|
+
if not return_value:
|
|
288
|
+
return y_type
|
|
289
|
+
y = mt.array(y)
|
|
290
|
+
return y_type, y.mf.apply_chunk(
|
|
291
|
+
_check_class_target_name_and_return, args=(y_type,), **y.params
|
|
292
|
+
)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import itertools
|
|
16
|
+
from collections.abc import Iterable
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from ... import opcodes
|
|
22
|
+
from ...core import ExecutableTuple, OutputType, get_output_types
|
|
23
|
+
from ...core.operator import MapReduceOperator
|
|
24
|
+
from ...dataframe.utils import parse_index
|
|
25
|
+
from ...serialization.serializables import FieldTypes, Int64Field, TupleField
|
|
26
|
+
from ...tensor.utils import check_random_state, gen_random_seeds, validate_axis
|
|
27
|
+
from ...utils import tokenize
|
|
28
|
+
from ..core import LearnOperatorMixin
|
|
29
|
+
from . import convert_to_tensor_or_dataframe
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _shuffle_index_value(op, index_value, chunk_index=None):
|
|
33
|
+
key = tokenize((op._values_, chunk_index, index_value.key))
|
|
34
|
+
return parse_index(pd.Index([], index_value.to_pandas().dtype), key=key)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LearnShuffle(MapReduceOperator, LearnOperatorMixin):
|
|
38
|
+
_op_type_ = opcodes.PERMUTATION
|
|
39
|
+
|
|
40
|
+
axes = TupleField("axes", FieldTypes.int32)
|
|
41
|
+
seeds = TupleField("seeds", FieldTypes.uint32)
|
|
42
|
+
n_samples = Int64Field("n_samples", default=None)
|
|
43
|
+
|
|
44
|
+
reduce_sizes = TupleField("reduce_sizes", FieldTypes.uint32)
|
|
45
|
+
|
|
46
|
+
def __init__(self, output_types=None, **kw):
|
|
47
|
+
super().__init__(_output_types=output_types, **kw)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def output_limit(self):
|
|
51
|
+
if self.stage is None:
|
|
52
|
+
return len(self.output_types)
|
|
53
|
+
return 1
|
|
54
|
+
|
|
55
|
+
def _shuffle_index_value(self, index_value):
|
|
56
|
+
return _shuffle_index_value(self, index_value)
|
|
57
|
+
|
|
58
|
+
def _shuffle_dtypes(self, dtypes):
|
|
59
|
+
seed = self.seeds[self.axes.index(1)]
|
|
60
|
+
rs = np.random.RandomState(seed)
|
|
61
|
+
shuffled_dtypes = dtypes[rs.permutation(np.arange(len(dtypes)))]
|
|
62
|
+
return shuffled_dtypes
|
|
63
|
+
|
|
64
|
+
def _calc_params(self, params):
|
|
65
|
+
axes = set(self.axes)
|
|
66
|
+
for i, output_type, param in zip(itertools.count(0), self.output_types, params):
|
|
67
|
+
if output_type == OutputType.dataframe:
|
|
68
|
+
if 0 in axes:
|
|
69
|
+
param["index_value"] = self._shuffle_index_value(
|
|
70
|
+
param["index_value"]
|
|
71
|
+
)
|
|
72
|
+
if 1 in axes:
|
|
73
|
+
dtypes = param["dtypes"] = self._shuffle_dtypes(param["dtypes"])
|
|
74
|
+
param["columns_value"] = parse_index(dtypes.index, store_data=True)
|
|
75
|
+
elif output_type == OutputType.series:
|
|
76
|
+
if 0 in axes:
|
|
77
|
+
param["index_value"] = self._shuffle_index_value(
|
|
78
|
+
param["index_value"]
|
|
79
|
+
)
|
|
80
|
+
param["_position_"] = i
|
|
81
|
+
return params
|
|
82
|
+
|
|
83
|
+
def __call__(self, arrays):
|
|
84
|
+
params = self._calc_params([ar.params for ar in arrays])
|
|
85
|
+
return self.new_tileables(arrays, kws=params)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def shuffle(*arrays, random_state=None, n_samples=None, axes=None):
|
|
89
|
+
arrays = [convert_to_tensor_or_dataframe(ar) for ar in arrays]
|
|
90
|
+
axes = axes or (0,)
|
|
91
|
+
if not isinstance(axes, Iterable):
|
|
92
|
+
axes = (axes,)
|
|
93
|
+
elif not isinstance(axes, tuple):
|
|
94
|
+
axes = tuple(axes)
|
|
95
|
+
random_state = check_random_state(random_state).to_numpy()
|
|
96
|
+
if n_samples:
|
|
97
|
+
raise TypeError(f"n_samples argument of shuffle() not supported.")
|
|
98
|
+
|
|
99
|
+
max_ndim = max(ar.ndim for ar in arrays)
|
|
100
|
+
axes = tuple(np.unique([validate_axis(max_ndim, ax) for ax in axes]).tolist())
|
|
101
|
+
seeds = gen_random_seeds(len(axes), random_state)
|
|
102
|
+
|
|
103
|
+
# verify shape
|
|
104
|
+
for ax in axes:
|
|
105
|
+
shapes = {ar.shape[ax] for ar in arrays if ax < ar.ndim}
|
|
106
|
+
if len(shapes) > 1:
|
|
107
|
+
raise ValueError(f"arrays do not have same shape on axis {ax}")
|
|
108
|
+
|
|
109
|
+
op = LearnShuffle(axes=axes, seeds=seeds, output_types=get_output_types(*arrays))
|
|
110
|
+
shuffled_arrays = op(arrays)
|
|
111
|
+
if len(arrays) == 1:
|
|
112
|
+
return shuffled_arrays[0]
|
|
113
|
+
else:
|
|
114
|
+
return ExecutableTuple(shuffled_arrays)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List, Optional
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ... import tensor as mt
|
|
21
|
+
from ...core import OutputType
|
|
22
|
+
from ...core.operator import Operator, TileableOperatorMixin
|
|
23
|
+
from ...serialization.serializables import Int16Field, ReferenceField
|
|
24
|
+
from ...typing_ import EntityType
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LearnCountNonzero(Operator, TileableOperatorMixin):
|
|
28
|
+
_op_module_ = "learn"
|
|
29
|
+
_op_type_ = opcodes.COUNT_NONZERO
|
|
30
|
+
|
|
31
|
+
axis = Int16Field("axis")
|
|
32
|
+
sample_weight = ReferenceField("sample_weight")
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def _set_inputs(cls, op: "LearnCountNonzero", inputs: List[EntityType]):
|
|
36
|
+
super()._set_inputs(op, inputs)
|
|
37
|
+
if op.sample_weight is not None:
|
|
38
|
+
op.sample_weight = inputs[-1]
|
|
39
|
+
|
|
40
|
+
def __call__(self, x, sample_weight=None):
|
|
41
|
+
self.sample_weight = sample_weight
|
|
42
|
+
self._output_types = [
|
|
43
|
+
OutputType.scalar if self.axis is None else OutputType.tensor
|
|
44
|
+
]
|
|
45
|
+
dtype = np.dtype(int)
|
|
46
|
+
inputs = [x]
|
|
47
|
+
if sample_weight is not None:
|
|
48
|
+
dtype = sample_weight.dtype
|
|
49
|
+
inputs = [x, sample_weight]
|
|
50
|
+
|
|
51
|
+
if self.axis is None:
|
|
52
|
+
shape = ()
|
|
53
|
+
else:
|
|
54
|
+
shape = (x.shape[1 - self.axis],)
|
|
55
|
+
|
|
56
|
+
return self.new_tileable(inputs, shape=shape, dtype=dtype)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def count_nonzero(X, axis: Optional[int] = None, sample_weight=None):
|
|
60
|
+
"""A variant of X.getnnz() with extension to weighting on axis 0
|
|
61
|
+
|
|
62
|
+
Useful in efficiently calculating multilabel metrics.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
X : CSR sparse matrix of shape (n_samples, n_labels)
|
|
67
|
+
Input data.
|
|
68
|
+
|
|
69
|
+
axis : None, 0 or 1
|
|
70
|
+
The axis on which the data is aggregated.
|
|
71
|
+
|
|
72
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
|
73
|
+
Weight for each row of X.
|
|
74
|
+
"""
|
|
75
|
+
if axis == -1:
|
|
76
|
+
axis = 1
|
|
77
|
+
elif axis == -2:
|
|
78
|
+
axis = 0
|
|
79
|
+
if axis is not None and axis not in (0, 1):
|
|
80
|
+
raise ValueError(f"Unsupported axis: {axis}")
|
|
81
|
+
|
|
82
|
+
X = mt.asarray(X)
|
|
83
|
+
if sample_weight is not None:
|
|
84
|
+
sample_weight = mt.asarray(sample_weight)
|
|
85
|
+
|
|
86
|
+
op = LearnCountNonzero(axis=axis)
|
|
87
|
+
return op(X, sample_weight=sample_weight)
|