maxframe 1.3.0__cp311-cp311-macosx_10_9_universal2.whl → 2.0.0b1__cp311-cp311-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-311-darwin.so +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cpython-311-darwin.so +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +33 -3
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +62 -9
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +128 -3
- maxframe/dataframe/reduction/core.py +20 -6
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +8 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +22 -48
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/models/dashscope.py +34 -0
- maxframe/learn/contrib/llm/models/managed.py +15 -0
- maxframe/learn/contrib/llm/multi_modal.py +92 -0
- maxframe/learn/contrib/llm/text.py +21 -5
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +53 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +13 -8
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cpython-311-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +67 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +5 -1
- maxframe-2.0.0b1.dist-info/RECORD +939 -0
- {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.0.dist-info/RECORD +0 -705
- {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -14,11 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
16
|
from collections import OrderedDict
|
|
17
|
+
from typing import List
|
|
17
18
|
|
|
18
19
|
from .... import opcodes
|
|
19
|
-
from ....core import OutputType
|
|
20
|
-
from ....core.operator
|
|
21
|
-
from ....core.operator.core import TileableOperatorMixin
|
|
20
|
+
from ....core import EntityData, OutputType
|
|
21
|
+
from ....core.operator import ObjectOperator, ObjectOperatorMixin
|
|
22
22
|
from ....serialization.serializables import (
|
|
23
23
|
AnyField,
|
|
24
24
|
BoolField,
|
|
@@ -29,7 +29,9 @@ from ....serialization.serializables import (
|
|
|
29
29
|
KeyField,
|
|
30
30
|
ListField,
|
|
31
31
|
)
|
|
32
|
-
from
|
|
32
|
+
from ..models import to_remote_model
|
|
33
|
+
from ..utils import TrainingCallback
|
|
34
|
+
from .core import Booster, BoosterData, XGBScikitLearnBase
|
|
33
35
|
from .dmatrix import ToDMatrix, to_dmatrix
|
|
34
36
|
|
|
35
37
|
logger = logging.getLogger(__name__)
|
|
@@ -41,7 +43,7 @@ def _on_serialize_evals(evals_val):
|
|
|
41
43
|
return [list(x) for x in evals_val]
|
|
42
44
|
|
|
43
45
|
|
|
44
|
-
class XGBTrain(
|
|
46
|
+
class XGBTrain(ObjectOperator, ObjectOperatorMixin):
|
|
45
47
|
_op_type_ = opcodes.XGBOOST_TRAIN
|
|
46
48
|
|
|
47
49
|
params = DictField("params", key_type=FieldTypes.string, default=None)
|
|
@@ -52,9 +54,12 @@ class XGBTrain(Operator, TileableOperatorMixin):
|
|
|
52
54
|
maximize = BoolField("maximize", default=None)
|
|
53
55
|
early_stopping_rounds = Int64Field("early_stopping_rounds", default=None)
|
|
54
56
|
verbose_eval = AnyField("verbose_eval", default=None)
|
|
55
|
-
xgb_model =
|
|
57
|
+
xgb_model = KeyField("xgb_model", default=None)
|
|
56
58
|
callbacks = ListField(
|
|
57
|
-
"callbacks",
|
|
59
|
+
"callbacks",
|
|
60
|
+
field_type=FunctionField.field_type,
|
|
61
|
+
default=None,
|
|
62
|
+
on_serialize=TrainingCallback.from_local,
|
|
58
63
|
)
|
|
59
64
|
custom_metric = FunctionField("custom_metric", default=None)
|
|
60
65
|
num_boost_round = Int64Field("num_boost_round", default=10)
|
|
@@ -67,21 +72,35 @@ class XGBTrain(Operator, TileableOperatorMixin):
|
|
|
67
72
|
if self.has_evals_result:
|
|
68
73
|
self.output_types.append(OutputType.object)
|
|
69
74
|
|
|
70
|
-
def
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
75
|
+
def has_custom_code(self) -> bool:
|
|
76
|
+
if not self.callbacks:
|
|
77
|
+
return False
|
|
78
|
+
return any(
|
|
79
|
+
not isinstance(cb, TrainingCallback) or cb.has_custom_code()
|
|
80
|
+
for cb in self.callbacks
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def _set_inputs(cls, op: "XGBTrain", inputs: List[EntityData]):
|
|
85
|
+
super()._set_inputs(op, inputs)
|
|
86
|
+
input_it = iter(op._inputs)
|
|
87
|
+
op.dtrain = next(input_it)
|
|
88
|
+
if op.evals:
|
|
89
|
+
evals_dict = OrderedDict(op.evals)
|
|
76
90
|
new_evals_dict = OrderedDict()
|
|
77
|
-
for
|
|
91
|
+
for val in evals_dict.values():
|
|
92
|
+
new_key = next(input_it)
|
|
78
93
|
new_evals_dict[new_key] = val
|
|
79
|
-
|
|
94
|
+
op.evals = list(new_evals_dict.items())
|
|
95
|
+
if op.xgb_model:
|
|
96
|
+
op.xgb_model = next(input_it)
|
|
80
97
|
|
|
81
98
|
def __call__(self, evals_result):
|
|
82
99
|
inputs = [self.dtrain]
|
|
83
100
|
if self.has_evals_result:
|
|
84
101
|
inputs.extend(e[0] for e in self.evals)
|
|
102
|
+
if self.xgb_model is not None:
|
|
103
|
+
inputs.append(self.xgb_model)
|
|
85
104
|
kws = [{"object_class": Booster}, {}]
|
|
86
105
|
return self.new_tileables(inputs, kws=kws, evals_result=evals_result)[0]
|
|
87
106
|
|
|
@@ -94,7 +113,28 @@ class XGBTrain(Operator, TileableOperatorMixin):
|
|
|
94
113
|
return self.evals
|
|
95
114
|
|
|
96
115
|
|
|
97
|
-
def
|
|
116
|
+
def _get_xgb_booster(xgb_model):
|
|
117
|
+
import xgboost
|
|
118
|
+
|
|
119
|
+
if isinstance(xgb_model, (XGBScikitLearnBase, xgboost.XGBModel)):
|
|
120
|
+
xgb_model = xgb_model.get_booster()
|
|
121
|
+
|
|
122
|
+
if isinstance(xgb_model, (Booster, BoosterData)):
|
|
123
|
+
return xgb_model
|
|
124
|
+
elif isinstance(xgb_model, xgboost.Booster):
|
|
125
|
+
return to_remote_model(xgb_model, model_cls=Booster)
|
|
126
|
+
raise ValueError(f"Cannot use {type(xgb_model)} as xgb_model")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def train(
|
|
130
|
+
params,
|
|
131
|
+
dtrain,
|
|
132
|
+
evals=None,
|
|
133
|
+
evals_result=None,
|
|
134
|
+
xgb_model=None,
|
|
135
|
+
num_class=None,
|
|
136
|
+
**kwargs,
|
|
137
|
+
):
|
|
98
138
|
"""
|
|
99
139
|
Train XGBoost model in MaxFrame manner.
|
|
100
140
|
|
|
@@ -120,11 +160,14 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
|
|
|
120
160
|
processed_evals.append((eval_dmatrix, name))
|
|
121
161
|
else:
|
|
122
162
|
processed_evals.append((to_dmatrix(eval_dmatrix), name))
|
|
163
|
+
if xgb_model is not None:
|
|
164
|
+
xgb_model = _get_xgb_booster(xgb_model)
|
|
123
165
|
data = XGBTrain(
|
|
124
166
|
params=params,
|
|
125
167
|
dtrain=dtrain,
|
|
126
168
|
evals=processed_evals,
|
|
127
169
|
evals_result=evals_result,
|
|
170
|
+
xgb_model=xgb_model,
|
|
128
171
|
num_class=num_class,
|
|
129
172
|
**kwargs,
|
|
130
173
|
)(evals_result)
|
maxframe/learn/core.py
CHANGED
|
@@ -12,7 +12,21 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import functools
|
|
16
|
+
import importlib
|
|
17
|
+
import inspect
|
|
18
|
+
from typing import Type
|
|
19
|
+
|
|
20
|
+
from ..core import ENTITY_TYPE
|
|
15
21
|
from ..core.entity.objects import Object, ObjectData
|
|
22
|
+
from ..core.operator import TileableOperatorMixin
|
|
23
|
+
from ..session import execute as execute_tileables
|
|
24
|
+
from ..session import fetch as fetch_tileables
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from sklearn.base import BaseEstimator as SkBaseEstimator
|
|
28
|
+
except ImportError:
|
|
29
|
+
SkBaseEstimator = object
|
|
16
30
|
|
|
17
31
|
|
|
18
32
|
class ModelData(ObjectData):
|
|
@@ -24,3 +38,241 @@ class Model(Object):
|
|
|
24
38
|
|
|
25
39
|
|
|
26
40
|
MODEL_TYPE = (Model, ModelData)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LearnOperatorMixin(TileableOperatorMixin):
|
|
44
|
+
_op_module_ = "learn"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@functools.lru_cache(100)
|
|
48
|
+
def _get_sklearn_estimator_cls(estimator_cls: Type["BaseEstimator"]):
|
|
49
|
+
mod_path = estimator_cls.__module__.replace("maxframe.learn", "sklearn").split(".")
|
|
50
|
+
mod_path = ".".join(p for p in mod_path if not p.startswith("_"))
|
|
51
|
+
|
|
52
|
+
exc = ValueError
|
|
53
|
+
while mod_path.startswith("sklearn."):
|
|
54
|
+
try:
|
|
55
|
+
mod = importlib.import_module(mod_path)
|
|
56
|
+
return getattr(mod, estimator_cls.__name__)
|
|
57
|
+
except (AttributeError, ImportError) as ex:
|
|
58
|
+
exc = ex
|
|
59
|
+
mod_path = mod_path.rsplit(".", 1)[0]
|
|
60
|
+
raise exc
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class BaseEstimator(SkBaseEstimator):
|
|
64
|
+
_data_attributes = []
|
|
65
|
+
|
|
66
|
+
def _get_data_attributes(self):
|
|
67
|
+
return self._data_attributes or [
|
|
68
|
+
attr
|
|
69
|
+
for attr in dir(self)
|
|
70
|
+
if not attr.startswith("_") and attr.endswith("_")
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
def _get_sklearn_cls(self):
|
|
74
|
+
return _get_sklearn_estimator_cls(type(self))
|
|
75
|
+
|
|
76
|
+
def _validate_data(
|
|
77
|
+
self, X, y=None, reset=True, validate_separately=False, **check_params
|
|
78
|
+
):
|
|
79
|
+
"""Validate input data and set or check the `n_features_in_` attribute.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
X : {array-like, sparse matrix, dataframe} of shape \
|
|
84
|
+
(n_samples, n_features)
|
|
85
|
+
The input samples.
|
|
86
|
+
y : array-like of shape (n_samples,), default=None
|
|
87
|
+
The targets. If None, `check_array` is called on `X` and
|
|
88
|
+
`check_X_y` is called otherwise.
|
|
89
|
+
reset : bool, default=True
|
|
90
|
+
Whether to reset the `n_features_in_` attribute.
|
|
91
|
+
If False, the input will be checked for consistency with data
|
|
92
|
+
provided when reset was last True.
|
|
93
|
+
validate_separately : False or tuple of dicts, default=False
|
|
94
|
+
Only used if y is not None.
|
|
95
|
+
If False, call validate_X_y(). Else, it must be a tuple of kwargs
|
|
96
|
+
to be used for calling check_array() on X and y respectively.
|
|
97
|
+
**check_params : kwargs
|
|
98
|
+
Parameters passed to :func:`sklearn.utils.check_array` or
|
|
99
|
+
:func:`sklearn.utils.check_X_y`. Ignored if validate_separately
|
|
100
|
+
is not False.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
out : tensor or tuple of these
|
|
105
|
+
The validated input. A tuple is returned if `y` is not None.
|
|
106
|
+
"""
|
|
107
|
+
from .utils.validation import check_array, check_X_y
|
|
108
|
+
|
|
109
|
+
if y is None:
|
|
110
|
+
if hasattr(self, "_get_tags") and self._get_tags().get(
|
|
111
|
+
"requires_y", False
|
|
112
|
+
): # pragma: no cover
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"This {type(self).__name__} estimator requires y to be passed, "
|
|
115
|
+
"but the target y is None."
|
|
116
|
+
)
|
|
117
|
+
X = check_array(X, **check_params)
|
|
118
|
+
out = X
|
|
119
|
+
elif isinstance(y, str) and y == "no_validation":
|
|
120
|
+
X = check_array(X, **check_params)
|
|
121
|
+
out = X
|
|
122
|
+
else: # pragma: no cover
|
|
123
|
+
if validate_separately:
|
|
124
|
+
# We need this because some estimators validate X and y
|
|
125
|
+
# separately, and in general, separately calling check_array()
|
|
126
|
+
# on X and y isn't equivalent to just calling check_X_y()
|
|
127
|
+
# :(
|
|
128
|
+
check_X_params, check_y_params = validate_separately
|
|
129
|
+
X = check_array(X, **check_X_params)
|
|
130
|
+
y = check_array(y, **check_y_params)
|
|
131
|
+
else:
|
|
132
|
+
X, y = check_X_y(X, y, **check_params)
|
|
133
|
+
out = X, y
|
|
134
|
+
|
|
135
|
+
if check_params.get("ensure_2d", True) and hasattr(self, "_check_n_features"):
|
|
136
|
+
self._check_n_features(X, reset=reset)
|
|
137
|
+
|
|
138
|
+
return out
|
|
139
|
+
|
|
140
|
+
def execute(self, session=None, run_kwargs=None, extra_tileables=None):
|
|
141
|
+
from .utils.validation import check_is_fitted
|
|
142
|
+
|
|
143
|
+
check_is_fitted(self)
|
|
144
|
+
|
|
145
|
+
if extra_tileables is not None and not isinstance(
|
|
146
|
+
extra_tileables, (list, tuple)
|
|
147
|
+
):
|
|
148
|
+
extra_tileables = [extra_tileables]
|
|
149
|
+
extra_tileables = list(extra_tileables or [])
|
|
150
|
+
|
|
151
|
+
attrs = [getattr(self, attr, None) for attr in self._get_data_attributes()]
|
|
152
|
+
attrs = [a for a in attrs + extra_tileables if isinstance(a, ENTITY_TYPE)]
|
|
153
|
+
execute_tileables(*attrs, session=session, run_kwargs=run_kwargs)
|
|
154
|
+
return self
|
|
155
|
+
|
|
156
|
+
def fetch(self, session=None, run_kwargs=None):
|
|
157
|
+
from .utils.validation import check_is_fitted
|
|
158
|
+
|
|
159
|
+
check_is_fitted(self)
|
|
160
|
+
|
|
161
|
+
regressor_cls = self._get_sklearn_cls()
|
|
162
|
+
cls_init_args = inspect.getfullargspec(regressor_cls.__init__)
|
|
163
|
+
cls_args = cls_init_args.args[1:] + cls_init_args.kwonlyargs
|
|
164
|
+
init_kw = {k: getattr(self, k, None) for k in cls_args}
|
|
165
|
+
init_kw = {k: v for k, v in init_kw.items() if v is not None}
|
|
166
|
+
regressor = regressor_cls(**init_kw)
|
|
167
|
+
|
|
168
|
+
attrs = [
|
|
169
|
+
(attr, getattr(self, attr, None)) for attr in self._get_data_attributes()
|
|
170
|
+
]
|
|
171
|
+
attrs = [tp for tp in attrs if tp[-1] is not None]
|
|
172
|
+
ent_attrs = [tp for tp in attrs if isinstance(tp[-1], ENTITY_TYPE)]
|
|
173
|
+
ent_attr_keys, ent_attr_vals = [list(x) for x in zip(*ent_attrs)]
|
|
174
|
+
|
|
175
|
+
ent_attr_vals = fetch_tileables(
|
|
176
|
+
*ent_attr_vals, session=session, run_kwargs=run_kwargs
|
|
177
|
+
)
|
|
178
|
+
if len(ent_attr_keys) == 1:
|
|
179
|
+
ent_attr_vals = (ent_attr_vals,)
|
|
180
|
+
|
|
181
|
+
attr_dict = dict(attrs)
|
|
182
|
+
attr_dict.update(zip(ent_attr_keys, ent_attr_vals))
|
|
183
|
+
for k, v in attr_dict.items():
|
|
184
|
+
setattr(regressor, k, v)
|
|
185
|
+
return regressor
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class TransformerMixin:
|
|
189
|
+
"""Mixin class for all transformers in scikit-learn."""
|
|
190
|
+
|
|
191
|
+
def fit_transform(self, X, y=None, **fit_params):
|
|
192
|
+
"""
|
|
193
|
+
Fit to data, then transform it.
|
|
194
|
+
|
|
195
|
+
Fits transformer to `X` and `y` with optional parameters `fit_params`
|
|
196
|
+
and returns a transformed version of `X`.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
X : array-like of shape (n_samples, n_features)
|
|
201
|
+
Input samples.
|
|
202
|
+
|
|
203
|
+
y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
|
|
204
|
+
default=None
|
|
205
|
+
Target values (None for unsupervised transformations).
|
|
206
|
+
|
|
207
|
+
**fit_params : dict
|
|
208
|
+
Additional fit parameters.
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
X_new : ndarray array of shape (n_samples, n_features_new)
|
|
213
|
+
Transformed array.
|
|
214
|
+
"""
|
|
215
|
+
# non-optimized default implementation; override when a better
|
|
216
|
+
# method is possible for a given clustering algorithm
|
|
217
|
+
if y is None:
|
|
218
|
+
# fit method of arity 1 (unsupervised transformation)
|
|
219
|
+
return self.fit(X, **fit_params).transform(X)
|
|
220
|
+
else:
|
|
221
|
+
# fit method of arity 2 (supervised transformation)
|
|
222
|
+
return self.fit(X, y, **fit_params).transform(X)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class RegressorMixin:
|
|
226
|
+
"""Mixin class for all regression estimators in scikit-learn."""
|
|
227
|
+
|
|
228
|
+
_estimator_type = "regressor"
|
|
229
|
+
|
|
230
|
+
def score(self, X, y, sample_weight=None):
|
|
231
|
+
"""Return the coefficient of determination :math:`R^2` of the
|
|
232
|
+
prediction.
|
|
233
|
+
|
|
234
|
+
The coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`,
|
|
235
|
+
where :math:`u` is the residual sum of squares ``((y_true - y_pred)
|
|
236
|
+
** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -
|
|
237
|
+
y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it
|
|
238
|
+
can be negative (because the model can be arbitrarily worse). A
|
|
239
|
+
constant model that always predicts the expected value of `y`,
|
|
240
|
+
disregarding the input features, would get a :math:`R^2` score of
|
|
241
|
+
0.0.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
X : array-like of shape (n_samples, n_features)
|
|
246
|
+
Test samples. For some estimators this may be a precomputed
|
|
247
|
+
kernel matrix or a list of generic objects instead with shape
|
|
248
|
+
``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
|
|
249
|
+
is the number of samples used in the fitting for the estimator.
|
|
250
|
+
|
|
251
|
+
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
252
|
+
True values for `X`.
|
|
253
|
+
|
|
254
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
|
255
|
+
Sample weights.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
score : Tensor
|
|
260
|
+
:math:`R^2` of ``self.predict(X)`` wrt. `y`.
|
|
261
|
+
|
|
262
|
+
Notes
|
|
263
|
+
-----
|
|
264
|
+
The :math:`R^2` score used when calling ``score`` on a regressor uses
|
|
265
|
+
``multioutput='uniform_average'`` from version 0.23 to keep consistent
|
|
266
|
+
with default value of :func:`~sklearn.metrics.r2_score`.
|
|
267
|
+
This influences the ``score`` method of all the multioutput
|
|
268
|
+
regressors (except for
|
|
269
|
+
:class:`~sklearn.multioutput.MultiOutputRegressor`).
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
from .metrics import r2_score
|
|
273
|
+
|
|
274
|
+
y_pred = self.predict(X)
|
|
275
|
+
return r2_score(y, y_pred, sample_weight=sample_weight)
|
|
276
|
+
|
|
277
|
+
def _more_tags(self): # noqa: R0201 # pylint: disable=no-self-use
|
|
278
|
+
return {"requires_y": True}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .samples_generator import (
|
|
16
|
+
make_blobs,
|
|
17
|
+
make_classification,
|
|
18
|
+
make_low_rank_matrix,
|
|
19
|
+
make_regression,
|
|
20
|
+
)
|