PyPI - maxframe - Versions diffs - 1.3.0__cp311-cp311-macosx_10_9_universal2.whl → 2.0.0b1__cp311-cp311-macosx_10_9_universal2.whl - Mend

maxframe 1.3.0__cp311-cp311-macosx_10_9_universal2.whl → 2.0.0b1__cp311-cp311-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (643) hide show

maxframe/_utils.cpython-311-darwin.so +0 -0
maxframe/_utils.pyi +21 -0
maxframe/_utils.pyx +4 -3
maxframe/codegen/__init__.py +27 -0
maxframe/{codegen.py → codegen/core.py} +49 -43
maxframe/codegen/spe/__init__.py +16 -0
maxframe/codegen/spe/core.py +307 -0
maxframe/codegen/spe/dataframe/__init__.py +37 -0
maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
maxframe/codegen/spe/dataframe/datasource.py +181 -0
maxframe/codegen/spe/dataframe/datastore.py +204 -0
maxframe/codegen/spe/dataframe/extensions.py +63 -0
maxframe/codegen/spe/dataframe/fetch.py +26 -0
maxframe/codegen/spe/dataframe/groupby.py +224 -0
maxframe/codegen/spe/dataframe/indexing.py +238 -0
maxframe/codegen/spe/dataframe/merge.py +73 -0
maxframe/codegen/spe/dataframe/misc.py +286 -0
maxframe/codegen/spe/dataframe/missing.py +64 -0
maxframe/codegen/spe/dataframe/reduction.py +160 -0
maxframe/codegen/spe/dataframe/sort.py +83 -0
maxframe/codegen/spe/dataframe/statistics.py +46 -0
maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
maxframe/codegen/spe/dataframe/tseries.py +46 -0
maxframe/codegen/spe/dataframe/udf.py +62 -0
maxframe/codegen/spe/dataframe/value_counts.py +31 -0
maxframe/codegen/spe/dataframe/window.py +65 -0
maxframe/codegen/spe/learn/__init__.py +15 -0
maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
maxframe/codegen/spe/learn/contrib/models.py +41 -0
maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
maxframe/codegen/spe/learn/utils/__init__.py +15 -0
maxframe/codegen/spe/learn/utils/checks.py +55 -0
maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
maxframe/codegen/spe/learn/utils/validation.py +35 -0
maxframe/codegen/spe/objects.py +26 -0
maxframe/codegen/spe/remote.py +29 -0
maxframe/codegen/spe/tensor/__init__.py +28 -0
maxframe/codegen/spe/tensor/arithmetic.py +95 -0
maxframe/codegen/spe/tensor/core.py +41 -0
maxframe/codegen/spe/tensor/datasource.py +165 -0
maxframe/codegen/spe/tensor/extensions.py +35 -0
maxframe/codegen/spe/tensor/fetch.py +26 -0
maxframe/codegen/spe/tensor/indexing.py +63 -0
maxframe/codegen/spe/tensor/linalg.py +63 -0
maxframe/codegen/spe/tensor/merge.py +31 -0
maxframe/codegen/spe/tensor/misc.py +121 -0
maxframe/codegen/spe/tensor/random.py +29 -0
maxframe/codegen/spe/tensor/reduction.py +39 -0
maxframe/codegen/spe/tensor/reshape.py +26 -0
maxframe/codegen/spe/tensor/sort.py +42 -0
maxframe/codegen/spe/tensor/special.py +35 -0
maxframe/codegen/spe/tensor/statistics.py +24 -0
maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
maxframe/codegen/spe/tests/__init__.py +13 -0
maxframe/codegen/spe/tests/test_remote.py +29 -0
maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
maxframe/codegen/spe/utils.py +54 -0
maxframe/codegen/tests/__init__.py +13 -0
maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
maxframe/config/__init__.py +1 -1
maxframe/config/config.py +50 -23
maxframe/config/tests/test_config.py +4 -12
maxframe/config/validators.py +5 -0
maxframe/conftest.py +38 -10
maxframe/core/__init__.py +1 -0
maxframe/core/context.py +110 -0
maxframe/core/entity/__init__.py +1 -0
maxframe/core/entity/core.py +0 -7
maxframe/core/entity/objects.py +19 -5
maxframe/core/entity/output_types.py +11 -0
maxframe/core/entity/tests/test_objects.py +11 -12
maxframe/core/entity/tileables.py +3 -1
maxframe/core/entity/utils.py +15 -0
maxframe/core/graph/__init__.py +6 -1
maxframe/core/graph/builder/base.py +5 -1
maxframe/core/graph/core.cpython-311-darwin.so +0 -0
maxframe/core/graph/core.pyx +17 -6
maxframe/core/graph/entity.py +18 -6
maxframe/core/operator/__init__.py +8 -3
maxframe/core/operator/base.py +35 -12
maxframe/core/operator/core.py +37 -14
maxframe/core/operator/fetch.py +5 -18
maxframe/core/operator/objects.py +0 -20
maxframe/core/operator/shuffle.py +6 -72
maxframe/dataframe/__init__.py +1 -0
maxframe/dataframe/accessors/datetime_/core.py +7 -4
maxframe/dataframe/accessors/string_/core.py +9 -6
maxframe/dataframe/arithmetic/core.py +31 -20
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/dataframe/core.py +98 -91
maxframe/dataframe/datasource/core.py +8 -1
maxframe/dataframe/datasource/date_range.py +8 -0
maxframe/dataframe/datasource/from_index.py +9 -5
maxframe/dataframe/datasource/from_records.py +9 -2
maxframe/dataframe/datasource/from_tensor.py +32 -21
maxframe/dataframe/datasource/read_csv.py +8 -2
maxframe/dataframe/datasource/read_odps_query.py +33 -3
maxframe/dataframe/datasource/read_odps_table.py +20 -5
maxframe/dataframe/datasource/read_parquet.py +8 -3
maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
maxframe/dataframe/datastore/to_csv.py +7 -3
maxframe/dataframe/datastore/to_odps.py +42 -6
maxframe/dataframe/extensions/__init__.py +6 -1
maxframe/dataframe/extensions/apply_chunk.py +96 -136
maxframe/dataframe/extensions/flatjson.py +3 -2
maxframe/dataframe/extensions/flatmap.py +15 -7
maxframe/dataframe/fetch/core.py +12 -1
maxframe/dataframe/groupby/__init__.py +7 -0
maxframe/dataframe/groupby/aggregation.py +62 -9
maxframe/dataframe/groupby/apply.py +50 -74
maxframe/dataframe/groupby/apply_chunk.py +393 -0
maxframe/dataframe/groupby/core.py +80 -17
maxframe/dataframe/groupby/extensions.py +26 -0
maxframe/dataframe/groupby/fill.py +9 -4
maxframe/dataframe/groupby/sample.py +7 -7
maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
maxframe/dataframe/groupby/transform.py +57 -54
maxframe/dataframe/indexing/align.py +7 -6
maxframe/dataframe/indexing/getitem.py +9 -8
maxframe/dataframe/indexing/iloc.py +28 -23
maxframe/dataframe/indexing/insert.py +7 -3
maxframe/dataframe/indexing/loc.py +9 -8
maxframe/dataframe/indexing/reindex.py +36 -30
maxframe/dataframe/indexing/rename_axis.py +18 -10
maxframe/dataframe/indexing/reset_index.py +0 -2
maxframe/dataframe/indexing/sample.py +13 -9
maxframe/dataframe/indexing/set_axis.py +9 -6
maxframe/dataframe/indexing/setitem.py +8 -5
maxframe/dataframe/indexing/where.py +12 -9
maxframe/dataframe/merge/__init__.py +0 -1
maxframe/dataframe/merge/concat.py +10 -31
maxframe/dataframe/merge/merge.py +2 -24
maxframe/dataframe/misc/__init__.py +6 -0
maxframe/dataframe/misc/_duplicate.py +7 -3
maxframe/dataframe/misc/apply.py +106 -139
maxframe/dataframe/misc/astype.py +3 -2
maxframe/dataframe/misc/case_when.py +11 -7
maxframe/dataframe/misc/cut.py +11 -10
maxframe/dataframe/misc/describe.py +7 -3
maxframe/dataframe/misc/drop.py +13 -11
maxframe/dataframe/misc/eval.py +0 -2
maxframe/dataframe/misc/get_dummies.py +78 -49
maxframe/dataframe/misc/isin.py +13 -10
maxframe/dataframe/misc/map.py +21 -6
maxframe/dataframe/misc/melt.py +8 -1
maxframe/dataframe/misc/pivot.py +232 -0
maxframe/dataframe/misc/pivot_table.py +52 -40
maxframe/dataframe/misc/rechunk.py +59 -0
maxframe/dataframe/misc/shift.py +7 -4
maxframe/dataframe/misc/stack.py +5 -3
maxframe/dataframe/misc/tests/test_misc.py +167 -1
maxframe/dataframe/misc/transform.py +63 -65
maxframe/dataframe/misc/value_counts.py +7 -4
maxframe/dataframe/missing/dropna.py +16 -7
maxframe/dataframe/missing/fillna.py +18 -10
maxframe/dataframe/missing/replace.py +10 -6
maxframe/dataframe/missing/tests/test_missing.py +2 -2
maxframe/dataframe/operators.py +1 -27
maxframe/dataframe/reduction/aggregation.py +128 -3
maxframe/dataframe/reduction/core.py +20 -6
maxframe/dataframe/reduction/median.py +1 -1
maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
maxframe/dataframe/reduction/unique.py +53 -7
maxframe/dataframe/statistics/corr.py +9 -6
maxframe/dataframe/statistics/quantile.py +9 -6
maxframe/dataframe/tseries/to_datetime.py +6 -4
maxframe/dataframe/utils.py +219 -31
maxframe/dataframe/window/rolling.py +7 -4
maxframe/env.py +1 -0
maxframe/errors.py +9 -0
maxframe/extension.py +13 -2
maxframe/io/objects/core.py +67 -51
maxframe/io/objects/tensor.py +73 -17
maxframe/io/objects/tests/test_object_io.py +8 -55
maxframe/io/odpsio/arrow.py +15 -2
maxframe/io/odpsio/schema.py +43 -13
maxframe/io/odpsio/tableio.py +63 -11
maxframe/io/odpsio/tests/test_arrow.py +1 -2
maxframe/io/odpsio/tests/test_schema.py +114 -1
maxframe/io/odpsio/tests/test_tableio.py +42 -0
maxframe/io/odpsio/tests/test_volumeio.py +22 -48
maxframe/learn/__init__.py +2 -2
maxframe/learn/contrib/__init__.py +2 -2
maxframe/learn/contrib/graph/connected_components.py +2 -1
maxframe/learn/contrib/lightgbm/__init__.py +33 -0
maxframe/learn/contrib/lightgbm/_predict.py +138 -0
maxframe/learn/contrib/lightgbm/_train.py +163 -0
maxframe/learn/contrib/lightgbm/callback.py +114 -0
maxframe/learn/contrib/lightgbm/classifier.py +199 -0
maxframe/learn/contrib/lightgbm/core.py +372 -0
maxframe/learn/contrib/lightgbm/dataset.py +153 -0
maxframe/learn/contrib/lightgbm/regressor.py +29 -0
maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
maxframe/learn/contrib/llm/models/dashscope.py +34 -0
maxframe/learn/contrib/llm/models/managed.py +15 -0
maxframe/learn/contrib/llm/multi_modal.py +92 -0
maxframe/learn/contrib/llm/text.py +21 -5
maxframe/learn/contrib/models.py +38 -9
maxframe/learn/contrib/utils.py +55 -0
maxframe/learn/contrib/xgboost/callback.py +86 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -30
maxframe/learn/contrib/xgboost/core.py +53 -42
maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
maxframe/learn/contrib/xgboost/predict.py +13 -8
maxframe/learn/contrib/xgboost/regressor.py +28 -27
maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
maxframe/learn/contrib/xgboost/train.py +59 -16
maxframe/learn/core.py +252 -0
maxframe/learn/datasets/__init__.py +20 -0
maxframe/learn/datasets/samples_generator.py +628 -0
maxframe/learn/linear_model/__init__.py +15 -0
maxframe/learn/linear_model/_base.py +163 -0
maxframe/learn/linear_model/_lin_reg.py +175 -0
maxframe/learn/metrics/__init__.py +25 -0
maxframe/learn/metrics/_check_targets.py +95 -0
maxframe/learn/metrics/_classification.py +1121 -0
maxframe/learn/metrics/_regression.py +256 -0
maxframe/learn/model_selection/__init__.py +15 -0
maxframe/learn/model_selection/_split.py +451 -0
maxframe/learn/model_selection/tests/__init__.py +13 -0
maxframe/learn/model_selection/tests/test_split.py +156 -0
maxframe/learn/preprocessing/__init__.py +16 -0
maxframe/learn/preprocessing/_data/__init__.py +17 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
maxframe/learn/preprocessing/_data/normalize.py +127 -0
maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
maxframe/learn/preprocessing/_data/utils.py +79 -0
maxframe/learn/preprocessing/_label/__init__.py +16 -0
maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
maxframe/learn/utils/__init__.py +4 -0
maxframe/learn/utils/_encode.py +314 -0
maxframe/learn/utils/checks.py +161 -0
maxframe/learn/utils/core.py +33 -0
maxframe/learn/utils/extmath.py +176 -0
maxframe/learn/utils/multiclass.py +292 -0
maxframe/learn/utils/shuffle.py +114 -0
maxframe/learn/utils/sparsefuncs.py +87 -0
maxframe/learn/utils/validation.py +775 -0
maxframe/lib/__init__.py +0 -2
maxframe/lib/compat.py +145 -0
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
maxframe/lib/sparse/__init__.py +10 -15
maxframe/lib/sparse/array.py +45 -33
maxframe/lib/sparse/core.py +0 -2
maxframe/lib/sparse/linalg.py +31 -0
maxframe/lib/sparse/matrix.py +5 -2
maxframe/lib/sparse/tests/__init__.py +0 -2
maxframe/lib/sparse/tests/test_sparse.py +53 -53
maxframe/lib/sparse/vector.py +0 -2
maxframe/mixin.py +59 -2
maxframe/opcodes.py +13 -5
maxframe/protocol.py +67 -14
maxframe/remote/core.py +16 -14
maxframe/remote/run_script.py +6 -3
maxframe/serialization/__init__.py +2 -0
maxframe/serialization/core.cpython-311-darwin.so +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +3 -1
maxframe/serialization/core.pyx +82 -4
maxframe/serialization/pandas.py +5 -1
maxframe/serialization/serializables/core.py +6 -5
maxframe/serialization/serializables/field.py +2 -2
maxframe/serialization/serializables/tests/test_field_type.py +3 -5
maxframe/serialization/tests/test_serial.py +27 -0
maxframe/session.py +4 -71
maxframe/sperunner.py +165 -0
maxframe/tensor/__init__.py +35 -2
maxframe/tensor/arithmetic/__init__.py +2 -4
maxframe/tensor/arithmetic/abs.py +0 -2
maxframe/tensor/arithmetic/absolute.py +0 -2
maxframe/tensor/arithmetic/add.py +34 -4
maxframe/tensor/arithmetic/angle.py +0 -2
maxframe/tensor/arithmetic/arccos.py +1 -4
maxframe/tensor/arithmetic/arccosh.py +1 -3
maxframe/tensor/arithmetic/arcsin.py +0 -2
maxframe/tensor/arithmetic/arcsinh.py +0 -2
maxframe/tensor/arithmetic/arctan.py +0 -2
maxframe/tensor/arithmetic/arctan2.py +0 -2
maxframe/tensor/arithmetic/arctanh.py +0 -2
maxframe/tensor/arithmetic/around.py +0 -2
maxframe/tensor/arithmetic/bitand.py +0 -2
maxframe/tensor/arithmetic/bitor.py +1 -3
maxframe/tensor/arithmetic/bitxor.py +1 -3
maxframe/tensor/arithmetic/cbrt.py +0 -2
maxframe/tensor/arithmetic/ceil.py +0 -2
maxframe/tensor/arithmetic/clip.py +13 -13
maxframe/tensor/arithmetic/conj.py +0 -2
maxframe/tensor/arithmetic/copysign.py +0 -2
maxframe/tensor/arithmetic/core.py +47 -39
maxframe/tensor/arithmetic/cos.py +1 -3
maxframe/tensor/arithmetic/cosh.py +0 -2
maxframe/tensor/arithmetic/deg2rad.py +0 -2
maxframe/tensor/arithmetic/degrees.py +0 -2
maxframe/tensor/arithmetic/divide.py +0 -2
maxframe/tensor/arithmetic/equal.py +0 -2
maxframe/tensor/arithmetic/exp.py +1 -3
maxframe/tensor/arithmetic/exp2.py +0 -2
maxframe/tensor/arithmetic/expm1.py +0 -2
maxframe/tensor/arithmetic/fabs.py +0 -2
maxframe/tensor/arithmetic/fix.py +0 -2
maxframe/tensor/arithmetic/float_power.py +0 -2
maxframe/tensor/arithmetic/floor.py +0 -2
maxframe/tensor/arithmetic/floordiv.py +0 -2
maxframe/tensor/arithmetic/fmax.py +0 -2
maxframe/tensor/arithmetic/fmin.py +0 -2
maxframe/tensor/arithmetic/fmod.py +0 -2
maxframe/tensor/arithmetic/frexp.py +6 -2
maxframe/tensor/arithmetic/greater.py +0 -2
maxframe/tensor/arithmetic/greater_equal.py +0 -2
maxframe/tensor/arithmetic/hypot.py +0 -2
maxframe/tensor/arithmetic/i0.py +1 -3
maxframe/tensor/arithmetic/imag.py +0 -2
maxframe/tensor/arithmetic/invert.py +1 -3
maxframe/tensor/arithmetic/isclose.py +0 -2
maxframe/tensor/arithmetic/iscomplex.py +0 -2
maxframe/tensor/arithmetic/isfinite.py +1 -3
maxframe/tensor/arithmetic/isinf.py +0 -2
maxframe/tensor/arithmetic/isnan.py +0 -2
maxframe/tensor/arithmetic/isreal.py +0 -2
maxframe/tensor/arithmetic/ldexp.py +0 -2
maxframe/tensor/arithmetic/less.py +0 -2
maxframe/tensor/arithmetic/less_equal.py +0 -2
maxframe/tensor/arithmetic/log.py +1 -3
maxframe/tensor/arithmetic/log10.py +1 -3
maxframe/tensor/arithmetic/log1p.py +1 -3
maxframe/tensor/arithmetic/log2.py +1 -3
maxframe/tensor/arithmetic/logaddexp.py +0 -2
maxframe/tensor/arithmetic/logaddexp2.py +0 -2
maxframe/tensor/arithmetic/logical_and.py +0 -2
maxframe/tensor/arithmetic/logical_not.py +1 -3
maxframe/tensor/arithmetic/logical_or.py +0 -2
maxframe/tensor/arithmetic/logical_xor.py +0 -2
maxframe/tensor/arithmetic/lshift.py +0 -2
maxframe/tensor/arithmetic/maximum.py +0 -2
maxframe/tensor/arithmetic/minimum.py +0 -2
maxframe/tensor/arithmetic/mod.py +0 -2
maxframe/tensor/arithmetic/modf.py +6 -2
maxframe/tensor/arithmetic/multiply.py +37 -4
maxframe/tensor/arithmetic/nan_to_num.py +0 -2
maxframe/tensor/arithmetic/negative.py +0 -2
maxframe/tensor/arithmetic/nextafter.py +0 -2
maxframe/tensor/arithmetic/not_equal.py +0 -2
maxframe/tensor/arithmetic/positive.py +0 -2
maxframe/tensor/arithmetic/power.py +0 -2
maxframe/tensor/arithmetic/rad2deg.py +0 -2
maxframe/tensor/arithmetic/radians.py +0 -2
maxframe/tensor/arithmetic/real.py +0 -2
maxframe/tensor/arithmetic/reciprocal.py +5 -3
maxframe/tensor/arithmetic/rint.py +1 -3
maxframe/tensor/arithmetic/rshift.py +0 -2
maxframe/tensor/arithmetic/setimag.py +0 -2
maxframe/tensor/arithmetic/setreal.py +0 -2
maxframe/tensor/arithmetic/sign.py +0 -2
maxframe/tensor/arithmetic/signbit.py +0 -2
maxframe/tensor/arithmetic/sin.py +0 -2
maxframe/tensor/arithmetic/sinc.py +1 -3
maxframe/tensor/arithmetic/sinh.py +0 -2
maxframe/tensor/arithmetic/spacing.py +0 -2
maxframe/tensor/arithmetic/sqrt.py +0 -2
maxframe/tensor/arithmetic/square.py +0 -2
maxframe/tensor/arithmetic/subtract.py +4 -2
maxframe/tensor/arithmetic/tan.py +0 -2
maxframe/tensor/arithmetic/tanh.py +0 -2
maxframe/tensor/arithmetic/tests/__init__.py +0 -2
maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
maxframe/tensor/arithmetic/truediv.py +0 -2
maxframe/tensor/arithmetic/trunc.py +0 -2
maxframe/tensor/arithmetic/utils.py +32 -6
maxframe/tensor/array_utils.py +3 -25
maxframe/tensor/core.py +6 -6
maxframe/tensor/datasource/__init__.py +10 -2
maxframe/tensor/datasource/arange.py +0 -2
maxframe/tensor/datasource/array.py +3 -22
maxframe/tensor/datasource/core.py +15 -10
maxframe/tensor/datasource/diag.py +140 -0
maxframe/tensor/datasource/diagflat.py +69 -0
maxframe/tensor/datasource/empty.py +0 -2
maxframe/tensor/datasource/eye.py +95 -0
maxframe/tensor/datasource/from_dataframe.py +0 -2
maxframe/tensor/datasource/from_dense.py +0 -17
maxframe/tensor/datasource/from_sparse.py +0 -2
maxframe/tensor/datasource/full.py +0 -2
maxframe/tensor/datasource/identity.py +54 -0
maxframe/tensor/datasource/indices.py +115 -0
maxframe/tensor/datasource/linspace.py +140 -0
maxframe/tensor/datasource/meshgrid.py +135 -0
maxframe/tensor/datasource/ones.py +8 -3
maxframe/tensor/datasource/tests/test_datasource.py +32 -1
maxframe/tensor/datasource/tri_array.py +107 -0
maxframe/tensor/datasource/zeros.py +7 -3
maxframe/tensor/extensions/__init__.py +31 -0
maxframe/tensor/extensions/accessor.py +25 -0
maxframe/tensor/extensions/apply_chunk.py +137 -0
maxframe/tensor/indexing/__init__.py +1 -1
maxframe/tensor/indexing/choose.py +8 -6
maxframe/tensor/indexing/compress.py +0 -2
maxframe/tensor/indexing/extract.py +0 -2
maxframe/tensor/indexing/fill_diagonal.py +9 -6
maxframe/tensor/indexing/flatnonzero.py +1 -3
maxframe/tensor/indexing/getitem.py +10 -43
maxframe/tensor/indexing/nonzero.py +2 -4
maxframe/tensor/indexing/setitem.py +19 -9
maxframe/tensor/indexing/slice.py +6 -3
maxframe/tensor/indexing/take.py +0 -2
maxframe/tensor/indexing/tests/__init__.py +0 -2
maxframe/tensor/indexing/tests/test_indexing.py +0 -2
maxframe/tensor/indexing/unravel_index.py +6 -6
maxframe/tensor/lib/__init__.py +16 -0
maxframe/tensor/lib/index_tricks.py +404 -0
maxframe/tensor/linalg/__init__.py +36 -0
maxframe/tensor/linalg/dot.py +145 -0
maxframe/tensor/linalg/inner.py +36 -0
maxframe/tensor/linalg/inv.py +83 -0
maxframe/tensor/linalg/lu.py +115 -0
maxframe/tensor/linalg/matmul.py +225 -0
maxframe/tensor/linalg/qr.py +124 -0
maxframe/tensor/linalg/solve_triangular.py +103 -0
maxframe/tensor/linalg/svd.py +167 -0
maxframe/tensor/linalg/tensordot.py +213 -0
maxframe/tensor/linalg/vdot.py +73 -0
maxframe/tensor/merge/__init__.py +4 -0
maxframe/tensor/merge/append.py +74 -0
maxframe/tensor/merge/column_stack.py +63 -0
maxframe/tensor/merge/concatenate.py +3 -2
maxframe/tensor/merge/dstack.py +71 -0
maxframe/tensor/merge/hstack.py +70 -0
maxframe/tensor/merge/stack.py +0 -2
maxframe/tensor/merge/tests/test_merge.py +0 -2
maxframe/tensor/misc/__init__.py +18 -5
maxframe/tensor/misc/astype.py +10 -8
maxframe/tensor/misc/broadcast_to.py +1 -1
maxframe/tensor/misc/copy.py +64 -0
maxframe/tensor/misc/diff.py +115 -0
maxframe/tensor/misc/flatten.py +63 -0
maxframe/tensor/misc/in1d.py +94 -0
maxframe/tensor/misc/isin.py +130 -0
maxframe/tensor/misc/ndim.py +53 -0
maxframe/tensor/misc/ravel.py +0 -2
maxframe/tensor/misc/repeat.py +129 -0
maxframe/tensor/misc/searchsorted.py +147 -0
maxframe/tensor/misc/setdiff1d.py +58 -0
maxframe/tensor/misc/squeeze.py +117 -0
maxframe/tensor/misc/swapaxes.py +113 -0
maxframe/tensor/misc/tests/test_misc.py +0 -2
maxframe/tensor/misc/transpose.py +8 -4
maxframe/tensor/misc/trapezoid.py +123 -0
maxframe/tensor/misc/unique.py +0 -1
maxframe/tensor/misc/where.py +10 -8
maxframe/tensor/operators.py +0 -34
maxframe/tensor/random/__init__.py +3 -5
maxframe/tensor/random/binomial.py +0 -2
maxframe/tensor/random/bytes.py +0 -2
maxframe/tensor/random/chisquare.py +0 -2
maxframe/tensor/random/choice.py +9 -8
maxframe/tensor/random/core.py +20 -5
maxframe/tensor/random/dirichlet.py +0 -2
maxframe/tensor/random/exponential.py +0 -2
maxframe/tensor/random/f.py +2 -4
maxframe/tensor/random/gamma.py +0 -2
maxframe/tensor/random/geometric.py +0 -2
maxframe/tensor/random/gumbel.py +0 -2
maxframe/tensor/random/hypergeometric.py +0 -2
maxframe/tensor/random/laplace.py +2 -4
maxframe/tensor/random/logistic.py +0 -2
maxframe/tensor/random/lognormal.py +0 -2
maxframe/tensor/random/logseries.py +0 -2
maxframe/tensor/random/multinomial.py +0 -2
maxframe/tensor/random/multivariate_normal.py +0 -2
maxframe/tensor/random/negative_binomial.py +0 -2
maxframe/tensor/random/noncentral_chisquare.py +0 -2
maxframe/tensor/random/noncentral_f.py +1 -3
maxframe/tensor/random/normal.py +0 -2
maxframe/tensor/random/pareto.py +0 -2
maxframe/tensor/random/permutation.py +6 -3
maxframe/tensor/random/poisson.py +0 -2
maxframe/tensor/random/power.py +0 -2
maxframe/tensor/random/rand.py +0 -2
maxframe/tensor/random/randint.py +0 -2
maxframe/tensor/random/randn.py +0 -2
maxframe/tensor/random/random_integers.py +0 -2
maxframe/tensor/random/random_sample.py +0 -2
maxframe/tensor/random/rayleigh.py +0 -2
maxframe/tensor/random/standard_cauchy.py +0 -2
maxframe/tensor/random/standard_exponential.py +0 -2
maxframe/tensor/random/standard_gamma.py +0 -2
maxframe/tensor/random/standard_normal.py +0 -2
maxframe/tensor/random/standard_t.py +0 -2
maxframe/tensor/random/tests/__init__.py +0 -2
maxframe/tensor/random/tests/test_random.py +0 -2
maxframe/tensor/random/triangular.py +0 -2
maxframe/tensor/random/uniform.py +0 -2
maxframe/tensor/random/vonmises.py +0 -2
maxframe/tensor/random/wald.py +0 -2
maxframe/tensor/random/weibull.py +0 -2
maxframe/tensor/random/zipf.py +0 -2
maxframe/tensor/reduction/__init__.py +0 -2
maxframe/tensor/reduction/all.py +0 -2
maxframe/tensor/reduction/allclose.py +0 -2
maxframe/tensor/reduction/any.py +0 -2
maxframe/tensor/reduction/argmax.py +1 -3
maxframe/tensor/reduction/argmin.py +1 -3
maxframe/tensor/reduction/array_equal.py +0 -2
maxframe/tensor/reduction/core.py +0 -2
maxframe/tensor/reduction/count_nonzero.py +0 -2
maxframe/tensor/reduction/cumprod.py +0 -2
maxframe/tensor/reduction/cumsum.py +0 -2
maxframe/tensor/reduction/max.py +0 -2
maxframe/tensor/reduction/mean.py +0 -2
maxframe/tensor/reduction/min.py +0 -2
maxframe/tensor/reduction/nanargmax.py +0 -2
maxframe/tensor/reduction/nanargmin.py +0 -2
maxframe/tensor/reduction/nancumprod.py +0 -2
maxframe/tensor/reduction/nancumsum.py +0 -2
maxframe/tensor/reduction/nanmax.py +0 -2
maxframe/tensor/reduction/nanmean.py +0 -2
maxframe/tensor/reduction/nanmin.py +0 -2
maxframe/tensor/reduction/nanprod.py +0 -2
maxframe/tensor/reduction/nanstd.py +0 -2
maxframe/tensor/reduction/nansum.py +0 -2
maxframe/tensor/reduction/nanvar.py +0 -2
maxframe/tensor/reduction/prod.py +0 -2
maxframe/tensor/reduction/std.py +0 -2
maxframe/tensor/reduction/sum.py +0 -2
maxframe/tensor/reduction/tests/test_reduction.py +1 -4
maxframe/tensor/reduction/var.py +0 -2
maxframe/tensor/reshape/__init__.py +0 -2
maxframe/tensor/reshape/reshape.py +6 -5
maxframe/tensor/reshape/tests/__init__.py +0 -2
maxframe/tensor/reshape/tests/test_reshape.py +0 -2
maxframe/tensor/sort/__init__.py +16 -0
maxframe/tensor/sort/argsort.py +150 -0
maxframe/tensor/sort/sort.py +295 -0
maxframe/tensor/special/__init__.py +37 -0
maxframe/tensor/special/core.py +38 -0
maxframe/tensor/special/misc.py +142 -0
maxframe/tensor/special/statistical.py +56 -0
maxframe/tensor/statistics/__init__.py +5 -0
maxframe/tensor/statistics/average.py +143 -0
maxframe/tensor/statistics/bincount.py +133 -0
maxframe/tensor/statistics/quantile.py +10 -8
maxframe/tensor/ufunc/__init__.py +0 -2
maxframe/tensor/ufunc/ufunc.py +0 -2
maxframe/tensor/utils.py +21 -3
maxframe/tests/test_protocol.py +3 -3
maxframe/tests/test_utils.py +210 -1
maxframe/tests/utils.py +67 -1
maxframe/udf.py +76 -6
maxframe/utils.py +418 -17
{maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +5 -1
maxframe-2.0.0b1.dist-info/RECORD +939 -0
{maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
maxframe_client/clients/framedriver.py +19 -3
maxframe_client/fetcher.py +113 -6
maxframe_client/session/odps.py +173 -38
maxframe_client/session/task.py +3 -1
maxframe_client/tests/test_session.py +41 -5
maxframe-1.3.0.dist-info/RECORD +0 -705
{maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0

maxframe/learn/preprocessing/_label/_label_encoder.py ADDED Viewed

@@ -0,0 +1,174 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from .... import tensor as mt
+from ....udf import builtin_function
+from ...core import BaseEstimator, TransformerMixin
+from ...utils._encode import _encode, _unique
+from ...utils.validation import _num_samples, column_or_1d
+try:
+    from sklearn.utils.validation import check_is_fitted
+except ImportError:
+    check_is_fitted = lambda *_: None
+class LabelEncoder(TransformerMixin, BaseEstimator):
+    """Encode target labels with value between 0 and n_classes-1.
+    This transformer should be used to encode target values, *i.e.* `y`, and
+    not the input `X`.
+    Read more in the :ref:`User Guide <preprocessing_targets>`.
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,)
+        Holds the label for each class.
+    See Also
+    --------
+    OrdinalEncoder : Encode categorical features using an ordinal encoding
+        scheme.
+    OneHotEncoder : Encode categorical features as a one-hot numeric array.
+    Examples
+    --------
+    `LabelEncoder` can be used to normalize labels.
+    >>> from maxframe.learn import preprocessing
+    >>> le = preprocessing.LabelEncoder()
+    >>> le.fit([1, 2, 2, 6]).execute()
+    LabelEncoder()
+    >>> le.classes_.to_numpy()
+    array([1, 2, 6])
+    >>> le.transform([1, 1, 2, 6]).to_numpy()
+    array([0, 0, 1, 2]...)
+    >>> le.inverse_transform([0, 0, 1, 2]).to_numpy()
+    array([1, 1, 2, 6])
+    It can also be used to transform non-numerical labels (as long as they are
+    hashable and comparable) to numerical labels.
+    >>> le = preprocessing.LabelEncoder()
+    >>> le.fit(["paris", "paris", "tokyo", "amsterdam"]).execute()
+    LabelEncoder()
+    >>> list(le.classes_.to_numpy())
+    ['amsterdam', 'paris', 'tokyo']
+    >>> le.transform(["tokyo", "tokyo", "paris"]).to_numpy()
+    array([2, 2, 1]...)
+    >>> list(le.inverse_transform([2, 2, 1]).to_numpy())
+    ['tokyo', 'tokyo', 'paris']
+    """
+    def fit(self, y, execute=False, session=None, run_kwargs=None):
+        """Fit label encoder.
+        Parameters
+        ----------
+        y : array-like of shape (n_samples,)
+            Target values.
+        Returns
+        -------
+        self : returns an instance of self.
+            Fitted label encoder.
+        """
+        y = column_or_1d(y, warn=True)
+        self.classes_ = _unique(y)
+        if execute:
+            self.execute(session=session, **(run_kwargs or dict()))
+        return self
+    def fit_transform(self, y, execute=False, session=None, run_kwargs=None):
+        """Fit label encoder and return encoded labels.
+        Parameters
+        ----------
+        y : array-like of shape (n_samples,)
+            Target values.
+        Returns
+        -------
+        y : array-like of shape (n_samples,)
+            Encoded labels.
+        """
+        y = column_or_1d(y, warn=True)
+        self.classes_, y = _unique(y, return_inverse=True)
+        if execute:
+            self.execute(session=session, extra_tileables=y, **(run_kwargs or dict()))
+        return y
+    def transform(self, y, execute=False, session=None, run_kwargs=None):
+        """Transform labels to normalized encoding.
+        Parameters
+        ----------
+        y : array-like of shape (n_samples,)
+            Target values.
+        Returns
+        -------
+        y : array-like of shape (n_samples,)
+            Labels as normalized encodings.
+        """
+        check_is_fitted(self)
+        y = column_or_1d(y, warn=True)
+        # transform of empty array is empty array
+        if _num_samples(y) == 0:
+            return mt.array([])
+        t = _encode(y, uniques=self.classes_)
+        if execute:
+            t = t.execute(session=session, **(run_kwargs or dict()))
+        return t
+    @staticmethod
+    @builtin_function
+    def _class_checker(chunk_data, classes_data):
+        diff = np.setdiff1d(chunk_data, np.arange(len(classes_data)))
+        if len(diff):
+            raise ValueError("y contains previously unseen labels: %s" % str(diff))
+        return classes_data[chunk_data]
+    def inverse_transform(self, y, execute=False, session=None, run_kwargs=None):
+        """Transform labels back to original encoding.
+        Parameters
+        ----------
+        y : ndarray of shape (n_samples,)
+            Target values.
+        Returns
+        -------
+        y : ndarray of shape (n_samples,)
+            Original encoding.
+        """
+        check_is_fitted(self)
+        y = column_or_1d(y, warn=True)
+        # inverse transform of empty array is empty array
+        if _num_samples(y) == 0:
+            return mt.array([])
+        labels = mt.asarray(y).mf.apply_chunk(
+            self._class_checker, args=(self.classes_,), dtype=self.classes_.dtype
+        )
+        if execute:
+            labels.execute(session=session, **(run_kwargs or dict()))
+        return labels
+    def _more_tags(self):
+        return {"X_types": ["1dlabels"]}

maxframe/learn/utils/__init__.py CHANGED Viewed

@@ -13,3 +13,7 @@
 # limitations under the License.
 from .core import convert_to_tensor_or_dataframe
+from .multiclass import check_classification_targets
+from .shuffle import shuffle
+from .sparsefuncs import count_nonzero
+from .validation import check_consistent_length

maxframe/learn/utils/_encode.py ADDED Viewed

@@ -0,0 +1,314 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import NamedTuple
+import numpy as np
+from maxframe import tensor as mt
+from ...udf import builtin_function
+from .core import is_scalar_nan
+@builtin_function
+def _cut_before(c, idx):
+    return c[: idx + 1]
+@builtin_function
+def _map_inverse_encodes(c, idx):
+    if c.flags.writeable:
+        c[c > idx] = idx
+    else:  # pragma: no cover
+        # If c is got from the shared memory, it is immutable.
+        c = np.select([c <= idx], [c], idx)
+    return c
+def _unique(values, *, return_inverse=False):
+    """Helper function to find unique values with support for python objects.
+    Uses pure python method for object dtype, and numpy method for
+    all other dtypes.
+    Parameters
+    ----------
+    values : ndarray
+        Values to check for unknowns.
+    return_inverse : bool, default=False
+        If True, also return the indices of the unique values.
+    Returns
+    -------
+    unique : ndarray
+        The sorted unique values.
+    unique_inverse : ndarray
+        The indices to reconstruct the original array from the unique array.
+        Only provided if `return_inverse` is True.
+    """
+    if values.dtype == object:
+        # FIXME workaround for LabelEncoder
+        #  remove if-block below when Series.unique implemented
+        # series_unique = md.Series(md.Series(values).unique()).sort_values().values
+        # if return_inverse:
+        #     return series_unique, _map_to_integer(values, series_unique)
+        # else:
+        #     return series_unique
+        return mt.unique(values, return_inverse=return_inverse)
+    out = mt.unique(values, return_inverse=return_inverse)
+    if return_inverse:
+        uniques, inverse = out
+    else:
+        uniques = out
+    # np.unique will have duplicate missing values at the end of `uniques`
+    # here we clip the nans and remove it from uniques
+    uniques = uniques.rechunk(tuple((s,) for s in uniques.shape))
+    nan_idx = mt.searchsorted(uniques, mt.nan)
+    uniques = uniques.mf.apply_chunk(
+        _cut_before,
+        args=(nan_idx,),
+        dtype=uniques.dtype,
+        shape=(np.nan,) * uniques.ndim,
+    )
+    if return_inverse:
+        inverse = inverse.mf.apply_chunk(
+            _map_inverse_encodes,
+            args=(nan_idx,),
+            dtype=inverse.dtype,
+            shape=((np.nan,),) * inverse.ndim,
+        )
+        return uniques, inverse
+    return uniques
+class _nandict(dict):  # pragma: no cover
+    """Dictionary with support for nans."""
+    def __init__(self, mapping):
+        super().__init__(mapping)
+        for key, value in mapping.items():
+            if is_scalar_nan(key):
+                self.nan_value = value
+                break
+    def __missing__(self, key):
+        if hasattr(self, "nan_value") and is_scalar_nan(key):
+            return self.nan_value
+        raise KeyError(key)
+class MissingValues(NamedTuple):  # pragma: no cover
+    """Data class for missing data information"""
+    nan: bool
+    none: bool
+    def to_list(self):
+        """Convert tuple to a list where None is always first."""
+        output = []
+        if self.none:
+            output.append(None)
+        if self.nan:
+            output.append(np.nan)
+        return output
+def _extract_missing(values):  # pragma: no cover
+    """Extract missing values from `values`.
+    Parameters
+    ----------
+    values: set
+        Set of values to extract missing from.
+    Returns
+    -------
+    output: set
+        Set with missing values extracted.
+    missing_values: MissingValues
+        Object with missing value information.
+    """
+    missing_values_set = {
+        value for value in values if value is None or is_scalar_nan(value)
+    }
+    if not missing_values_set:
+        return values, MissingValues(nan=False, none=False)
+    if None in missing_values_set:
+        if len(missing_values_set) == 1:
+            output_missing_values = MissingValues(nan=False, none=True)
+        else:
+            # If there is more than one missing value, then it has to be
+            # float('nan') or np.nan
+            output_missing_values = MissingValues(nan=True, none=True)
+    else:
+        output_missing_values = MissingValues(nan=True, none=False)
+    # create set without the missing values
+    output = values - missing_values_set
+    return output, output_missing_values
+@builtin_function
+def _value_to_int_mapper(values_data, uniques_data, check_unknown=True):
+    if values_data.dtype.kind in "OUS":
+        try:
+            table = _nandict({val: i for i, val in enumerate(uniques_data)})
+            return np.array([table[v] for v in values_data])
+        except KeyError as e:
+            raise ValueError(f"y contains previously unseen labels: {str(e)}")
+    else:
+        if check_unknown:
+            diff = _check_unknown(values_data, uniques_data)
+            if diff:
+                raise ValueError(f"y contains previously unseen labels: {str(diff)}")
+        return np.searchsorted(uniques_data, values_data)
+def _map_to_integer(values, uniques, check_unknown=True):
+    """Map values based on its position in uniques."""
+    return values.mf.apply_chunk(
+        _value_to_int_mapper,
+        args=(uniques,),
+        dtype=np.dtype(np.int64),
+        shape=values.shape,
+        check_unknown=check_unknown,
+    )
+def _check_unknown(values, known_values, return_mask=False):  # pragma: no cover
+    """
+    Helper function to check for unknowns in values to be encoded.
+    Uses pure python method for object dtype, and numpy method for
+    all other dtypes.
+    Parameters
+    ----------
+    values : array
+        Values to check for unknowns.
+    known_values : array
+        Known values. Must be unique.
+    return_mask : bool, default=False
+        If True, return a mask of the same shape as `values` indicating
+        the valid values.
+    Returns
+    -------
+    diff : list
+        The unique values present in `values` and not in `know_values`.
+    valid_mask : boolean array
+        Additionally returned if ``return_mask=True``.
+    """
+    valid_mask = None
+    if values.dtype.kind in "OUS":
+        values_set = set(values)
+        values_set, missing_in_values = _extract_missing(values_set)
+        uniques_set = set(known_values)
+        uniques_set, missing_in_uniques = _extract_missing(uniques_set)
+        diff = values_set - uniques_set
+        nan_in_diff = missing_in_values.nan and not missing_in_uniques.nan
+        none_in_diff = missing_in_values.none and not missing_in_uniques.none
+        def is_valid(value):
+            return (
+                value in uniques_set
+                or missing_in_uniques.none
+                and value is None
+                or missing_in_uniques.nan
+                and is_scalar_nan(value)
+            )
+        if return_mask:
+            if diff or nan_in_diff or none_in_diff:
+                valid_mask = np.array([is_valid(value) for value in values])
+            else:
+                valid_mask = np.ones(len(values), dtype=bool)
+        diff = list(diff)
+        if none_in_diff:
+            diff.append(None)
+        if nan_in_diff:
+            diff.append(np.nan)
+    else:
+        unique_values = np.unique(values)
+        diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
+        if return_mask:
+            if diff.size:
+                valid_mask = np.in1d(values, known_values)
+            else:
+                valid_mask = np.ones(len(values), dtype=bool)
+        # check for nans in the known_values
+        if np.isnan(known_values).any():
+            diff_is_nan = np.isnan(diff)
+            if diff_is_nan.any():
+                # removes nan from valid_mask
+                if diff.size and return_mask:
+                    is_nan = np.isnan(values)
+                    valid_mask[is_nan] = 1
+                # remove nan from diff
+                diff = diff[~diff_is_nan]
+        diff = list(diff)
+    if return_mask:
+        return diff, valid_mask
+    return diff
+def _encode(values, *, uniques, check_unknown=True):
+    """Helper function to encode values into [0, n_uniques - 1].
+    Uses pure python method for object dtype, and numpy method for
+    all other dtypes.
+    The numpy method has the limitation that the `uniques` need to
+    be sorted. Importantly, this is not checked but assumed to already be
+    the case. The calling method needs to ensure this for all non-object
+    values.
+    Parameters
+    ----------
+    values : tensor
+        Values to encode.
+    uniques : tensor
+        The unique values in `values`. If the dtype is not object, then
+        `uniques` needs to be sorted.
+    check_unknown : bool, default=True
+        If True, check for values in `values` that are not in `unique`
+        and raise an error. This is ignored for object dtype, and treated as
+        True in this case. This parameter is useful for
+        _BaseEncoder._transform() to avoid calling _check_unknown()
+        twice.
+    Returns
+    -------
+    encoded : tensor
+        Encoded values
+    """
+    return _map_to_integer(values, uniques, check_unknown=check_unknown)

maxframe/learn/utils/checks.py ADDED Viewed

@@ -0,0 +1,161 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+import numpy as np
+from ... import opcodes
+from ... import tensor as mt
+from ...config import options
+from ...core import ENTITY_TYPE, EntityData, OutputType, get_output_types
+from ...core.operator import Operator, OperatorStage
+from ...serialization.serializables import (
+    BoolField,
+    DataTypeField,
+    KeyField,
+    StringField,
+)
+from ...tensor.core import TensorOrder
+from ..core import LearnOperatorMixin
+try:
+    from sklearn import get_config as get_sklearn_config
+except ImportError:  # pragma: no cover
+    get_sklearn_config = None
+class CheckBase(Operator, LearnOperatorMixin):
+    input = KeyField("input")
+    value = KeyField("value", default=None)
+    err_msg = StringField("err_msg", default=None)
+    def __init__(self, output_types=None, **kw):
+        super().__init__(_output_types=output_types, **kw)
+    @classmethod
+    def _set_inputs(cls, op: "CheckBase", inputs: List[EntityData]):
+        super()._set_inputs(op, inputs)
+        if op.input is not None:
+            op.input = op._inputs[0]
+        if op.value is not None:
+            op.value = op._inputs[-1]
+    def __call__(self, x, value=None):
+        # output input if value not specified
+        self.value = value = value if value is not None else x
+        self.output_types = get_output_types(value)
+        self.stage = OperatorStage.agg
+        return self.new_tileable([x, value], kws=[value.params])
+class CheckNonNegative(CheckBase):
+    _op_type_ = opcodes.CHECK_NON_NEGATIVE
+    whom = StringField("whom", default=None)
+    def __init__(self, gpu=None, output_types=None, **kw):
+        super().__init__(output_types=output_types, gpu=gpu, **kw)
+        if self.err_msg is None and self.whom is not None:
+            self.err_msg = f"Negative values in data passed to {self.whom}"
+def check_non_negative_then_return_value(to_check, value, whom):
+    op = CheckNonNegative(
+        input=to_check, value=value, whom=whom, sparse=to_check.is_sparse()
+    )
+    return op(to_check, value)
+class AssertAllFinite(Operator, LearnOperatorMixin):
+    _op_type_ = opcodes.ASSERT_ALL_FINITE
+    x = KeyField("x")
+    allow_nan = BoolField("allow_nan", default=None)
+    msg_dtype = DataTypeField("msg_dtype", default=None)
+    check_only = BoolField("check_only", default=None)
+    input_name = StringField("input_name", default=None)
+    # chunks
+    is_finite = KeyField("is_finite", default=None)
+    check_nan = KeyField("check_nan", default=None)
+    def __init__(self, x=None, output_types=None, **kw):
+        super().__init__(x=x, _output_types=output_types, **kw)
+    @classmethod
+    def _set_inputs(cls, op: "AssertAllFinite", inputs: List[EntityData]):
+        super()._set_inputs(op, inputs)
+        inputs_iter = iter(op.inputs)
+        for attr in ("x", "is_finite", "check_nan"):
+            if getattr(op, attr) is not None:
+                setattr(op, attr, next(inputs_iter))
+    @classmethod
+    def _assume_finite(cls):
+        assume_finite = options.learn.assume_finite
+        if assume_finite is None and get_sklearn_config is not None:
+            # get config from scikit-learn
+            assume_finite = get_sklearn_config()["assume_finite"]
+        if assume_finite is None:  # pragma: no cover
+            assume_finite = False
+        return assume_finite
+    def __call__(self, x):
+        if self._assume_finite():
+            # skip check
+            if self.check_only:
+                return
+            else:
+                return x
+        if self.check_only:
+            return self.new_tileable(
+                [x], dtype=np.dtype(bool), shape=(), order=TensorOrder.C_ORDER
+            )
+        else:
+            return self.new_tileable([x], kws=[x.params])
+def assert_all_finite(
+    X, allow_nan=False, msg_dtype=None, check_only=True, input_name=""
+):
+    if not isinstance(X, ENTITY_TYPE):
+        X = mt.asarray(X)
+    if (
+        isinstance(X.op, AssertAllFinite)
+        and X.op.allow_nan == allow_nan
+        and X.op.msg_dtype == msg_dtype
+        and X.op.check_only == check_only
+    ):
+        return X
+    if check_only:
+        output_types = [OutputType.tensor]
+        sparse = False
+    else:
+        output_types = get_output_types(X)
+        sparse = X.issparse()
+    op = AssertAllFinite(
+        x=X,
+        allow_nan=allow_nan,
+        msg_dtype=msg_dtype,
+        check_only=check_only,
+        sparse=sparse,
+        output_types=output_types,
+        input_name=input_name,
+    )
+    return op(X)

maxframe/learn/utils/core.py CHANGED Viewed

@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import math
+import numbers
 import pandas as pd
 from ...dataframe import DataFrame, Series
@@ -27,3 +30,33 @@ def convert_to_tensor_or_dataframe(item):
     else:
         item = astensor(item)
     return item
+def is_scalar_nan(x):
+    """Tests if x is NaN.
+    This function is meant to overcome the issue that np.isnan does not allow
+    non-numerical types as input, and that np.nan is not float('nan').
+    Parameters
+    ----------
+    x : any type
+    Returns
+    -------
+    boolean
+    Examples
+    --------
+    >>> is_scalar_nan(np.nan)
+    True
+    >>> is_scalar_nan(float("nan"))
+    True
+    >>> is_scalar_nan(None)
+    False
+    >>> is_scalar_nan("")
+    False
+    >>> is_scalar_nan([np.nan])
+    False
+    """
+    return isinstance(x, numbers.Real) and math.isnan(x)