maxframe 1.3.1__cp310-cp310-win_amd64.whl → 2.0.0b1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp310-win_amd64.pyd +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +33 -3
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +8 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +22 -48
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +53 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +67 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +4 -1
- maxframe-2.0.0b1.dist-info/RECORD +939 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from .... import opcodes
|
|
18
|
+
from ....core import EntityData
|
|
19
|
+
from ....core.entity.output_types import get_output_types
|
|
20
|
+
from ....core.operator.base import Operator
|
|
21
|
+
from ....core.operator.core import TileableOperatorMixin
|
|
22
|
+
from ....dataframe.core import DATAFRAME_TYPE
|
|
23
|
+
from ....serialization.serializables import BoolField, KeyField, ListField
|
|
24
|
+
from ....serialization.serializables.field import AnyField
|
|
25
|
+
from ....tensor import tensor as astensor
|
|
26
|
+
from ....tensor.core import TENSOR_TYPE
|
|
27
|
+
from ....typing_ import TileableType
|
|
28
|
+
from ...utils import convert_to_tensor_or_dataframe
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ToLGBMDataset(Operator, TileableOperatorMixin):
|
|
32
|
+
_op_type_ = opcodes.TO_LGBM_DATASET
|
|
33
|
+
|
|
34
|
+
data = KeyField("data", default=None)
|
|
35
|
+
label = KeyField("label", default=None)
|
|
36
|
+
reference = KeyField("reference", default=None)
|
|
37
|
+
weight = KeyField("weight", default=None)
|
|
38
|
+
init_score = KeyField("init_score", default=None)
|
|
39
|
+
group = AnyField("group", default=None)
|
|
40
|
+
feature_name = ListField("feature_name", default=None)
|
|
41
|
+
categorical_feature = ListField("categorical_feature", default=None)
|
|
42
|
+
params = AnyField("params", default=None)
|
|
43
|
+
free_raw_data = BoolField("free_raw_data", default=None)
|
|
44
|
+
position = AnyField("position", default=None)
|
|
45
|
+
# if to collocate the data, label and weight
|
|
46
|
+
collocate = BoolField("collocate", default=False)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def output_limit(self):
|
|
50
|
+
if self.collocate:
|
|
51
|
+
return 1 + sum(bool(x) for x in [self.label, self.weight, self.init_score])
|
|
52
|
+
return 1
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def _set_inputs(cls, op: "ToLGBMDataset", inputs: List[EntityData]):
|
|
56
|
+
super()._set_inputs(op, inputs)
|
|
57
|
+
if op.data is not None:
|
|
58
|
+
op.data = op._inputs[0]
|
|
59
|
+
has_label = op.label is not None
|
|
60
|
+
if has_label:
|
|
61
|
+
op.label = op._inputs[1]
|
|
62
|
+
if op.weight is not None:
|
|
63
|
+
i = 1 if not has_label else 2
|
|
64
|
+
op.weight = op._inputs[i]
|
|
65
|
+
if op.init_score is not None:
|
|
66
|
+
op.init_score = op._inputs[-1]
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def _get_kw(obj):
|
|
70
|
+
if isinstance(obj, TENSOR_TYPE):
|
|
71
|
+
return {"shape": obj.shape, "dtype": obj.dtype, "order": obj.order}
|
|
72
|
+
else:
|
|
73
|
+
return {
|
|
74
|
+
"shape": obj.shape,
|
|
75
|
+
"dtypes": obj.dtypes,
|
|
76
|
+
"index_value": obj.index_value,
|
|
77
|
+
"columns_value": obj.columns_value,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def __call__(self):
|
|
81
|
+
inputs = [self.data]
|
|
82
|
+
kw = self._get_kw(self.data)
|
|
83
|
+
if self.label is not None:
|
|
84
|
+
inputs.append(self.label)
|
|
85
|
+
if self.weight is not None:
|
|
86
|
+
inputs.append(self.weight)
|
|
87
|
+
if self.init_score is not None:
|
|
88
|
+
inputs.append(self.init_score)
|
|
89
|
+
|
|
90
|
+
return self.new_tileable(inputs, **kw)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def check_data(data):
|
|
94
|
+
data = convert_to_tensor_or_dataframe(data)
|
|
95
|
+
if data.ndim != 2:
|
|
96
|
+
raise ValueError(f"Expecting 2-d data, got: {data.ndim}-d")
|
|
97
|
+
|
|
98
|
+
return data
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def check_array_like(y: TileableType, name: str) -> TileableType:
|
|
102
|
+
if y is None:
|
|
103
|
+
return
|
|
104
|
+
y = convert_to_tensor_or_dataframe(y)
|
|
105
|
+
if isinstance(y, DATAFRAME_TYPE):
|
|
106
|
+
y = y.iloc[:, 0]
|
|
107
|
+
return astensor(y)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def to_lgbm_dataset(
|
|
111
|
+
data,
|
|
112
|
+
label=None,
|
|
113
|
+
reference=None,
|
|
114
|
+
weight=None,
|
|
115
|
+
group=None,
|
|
116
|
+
init_score=None,
|
|
117
|
+
feature_name="auto",
|
|
118
|
+
categorical_feature="auto",
|
|
119
|
+
params=None,
|
|
120
|
+
free_raw_data=True,
|
|
121
|
+
position=None,
|
|
122
|
+
):
|
|
123
|
+
data = check_data(data)
|
|
124
|
+
label = check_array_like(label, "label")
|
|
125
|
+
weight = check_array_like(weight, "weight")
|
|
126
|
+
init_score = check_array_like(init_score, "init_score")
|
|
127
|
+
|
|
128
|
+
if weight is not None and weight.ndim > 1:
|
|
129
|
+
raise ValueError("weight must be 1-dimensional")
|
|
130
|
+
|
|
131
|
+
# If not multiple outputs, try to collect the chunks on same worker into one
|
|
132
|
+
# to feed the data into LightGBM for training.
|
|
133
|
+
op = ToLGBMDataset(
|
|
134
|
+
data=data,
|
|
135
|
+
label=label,
|
|
136
|
+
reference=reference,
|
|
137
|
+
weight=weight,
|
|
138
|
+
group=group,
|
|
139
|
+
init_score=init_score,
|
|
140
|
+
feature_name=None if feature_name == "auto" else feature_name,
|
|
141
|
+
categorical_feature=None
|
|
142
|
+
if categorical_feature == "auto"
|
|
143
|
+
else categorical_feature,
|
|
144
|
+
params=params,
|
|
145
|
+
free_raw_data=free_raw_data,
|
|
146
|
+
position=position,
|
|
147
|
+
gpu=data.op.gpu,
|
|
148
|
+
_output_types=get_output_types(data),
|
|
149
|
+
)
|
|
150
|
+
return op()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
Dataset = to_lgbm_dataset
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ..utils import make_import_error_func
|
|
16
|
+
from .core import LGBMScikitLearnBase
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import lightgbm
|
|
20
|
+
except ImportError:
|
|
21
|
+
lightgbm = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if not lightgbm:
|
|
25
|
+
LGBMRegressor = make_import_error_func("lightgbm")
|
|
26
|
+
else:
|
|
27
|
+
|
|
28
|
+
class LGBMRegressor(LGBMScikitLearnBase, lightgbm.LGBMRegressor):
|
|
29
|
+
_default_objective = "regression"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import inspect
|
|
16
|
+
|
|
17
|
+
import pytest
|
|
18
|
+
|
|
19
|
+
from ..callback import (
|
|
20
|
+
EarlyStoppingCallback,
|
|
21
|
+
LGBMTrainingCallback,
|
|
22
|
+
early_stopping,
|
|
23
|
+
reset_parameter,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from lightgbm.callback import _EarlyStoppingCallback
|
|
28
|
+
except ImportError:
|
|
29
|
+
try:
|
|
30
|
+
from lightgbm.callback import early_stopping as _EarlyStoppingCallback
|
|
31
|
+
except ImportError:
|
|
32
|
+
pytestmark = pytest.mark.skip("Need lightgbm to run the test")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_lgbm_training_callbacks():
|
|
36
|
+
early_stopping_cb = early_stopping(stopping_rounds=5)
|
|
37
|
+
assert not early_stopping_cb.has_custom_code()
|
|
38
|
+
|
|
39
|
+
local_early_stopping_cb = early_stopping_cb.to_local()
|
|
40
|
+
if isinstance(_EarlyStoppingCallback, type):
|
|
41
|
+
assert isinstance(local_early_stopping_cb, _EarlyStoppingCallback)
|
|
42
|
+
assert local_early_stopping_cb.stopping_rounds == 5
|
|
43
|
+
else:
|
|
44
|
+
assert local_early_stopping_cb.__qualname__.startswith(
|
|
45
|
+
_EarlyStoppingCallback.__name__
|
|
46
|
+
)
|
|
47
|
+
cb_vars = inspect.getclosurevars(local_early_stopping_cb).nonlocals
|
|
48
|
+
assert cb_vars["stopping_rounds"] == 5
|
|
49
|
+
|
|
50
|
+
rmt_early_stopping_cb = LGBMTrainingCallback.from_local(local_early_stopping_cb)
|
|
51
|
+
assert isinstance(rmt_early_stopping_cb, EarlyStoppingCallback)
|
|
52
|
+
assert rmt_early_stopping_cb.stopping_rounds == 5
|
|
53
|
+
|
|
54
|
+
lr_sch_cb = reset_parameter(a=[0.5, 0.4, 0.1])
|
|
55
|
+
assert not lr_sch_cb.has_custom_code()
|
|
56
|
+
|
|
57
|
+
lr_sch_cb = reset_parameter(a=lambda x: x * 0.1)
|
|
58
|
+
assert lr_sch_cb.has_custom_code()
|
maxframe/learn/contrib/models.py
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import Type
|
|
16
|
+
|
|
15
17
|
from ... import opcodes
|
|
16
18
|
from ...core import ENTITY_TYPE, OutputType
|
|
17
19
|
from ...core.operator import ObjectOperator, ObjectOperatorMixin
|
|
@@ -21,7 +23,30 @@ from ...serialization.serializables import (
|
|
|
21
23
|
FunctionField,
|
|
22
24
|
TupleField,
|
|
23
25
|
)
|
|
26
|
+
from ...udf import BuiltinFunction
|
|
24
27
|
from ...utils import find_objects, replace_objects
|
|
28
|
+
from ..core import Model, ModelData
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ModelWithEvalData(ModelData):
|
|
32
|
+
__slots__ = ("_evals_result",)
|
|
33
|
+
|
|
34
|
+
_evals_result: dict
|
|
35
|
+
|
|
36
|
+
def __init__(self, *args, evals_result=None, **kwargs):
|
|
37
|
+
super().__init__(*args, **kwargs)
|
|
38
|
+
self._evals_result = evals_result if evals_result is not None else dict()
|
|
39
|
+
|
|
40
|
+
def execute(self, session=None, **kw):
|
|
41
|
+
# The evals_result should be fetched when BoosterData.execute() is called.
|
|
42
|
+
result = super().execute(session=session, **kw)
|
|
43
|
+
if self.op.has_evals_result and self.key == self.op.outputs[0].key:
|
|
44
|
+
self._evals_result.update(self.op.outputs[1].fetch(session=session))
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ModelWithEval(Model):
|
|
49
|
+
pass
|
|
25
50
|
|
|
26
51
|
|
|
27
52
|
class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
|
|
@@ -29,7 +54,7 @@ class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
|
|
|
29
54
|
|
|
30
55
|
data = AnyField("data")
|
|
31
56
|
|
|
32
|
-
def __call__(self, model_cls):
|
|
57
|
+
def __call__(self, model_cls: Type[ModelWithEval]):
|
|
33
58
|
self._output_types = [OutputType.object]
|
|
34
59
|
return self.new_tileable(None, object_class=model_cls)
|
|
35
60
|
|
|
@@ -48,14 +73,18 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
|
|
|
48
73
|
self._output_types = list(output_types)
|
|
49
74
|
super().__init__(**kwargs)
|
|
50
75
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
76
|
+
def has_custom_code(self) -> bool:
|
|
77
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def _set_inputs(cls, op: "ModelApplyChunk", inputs):
|
|
81
|
+
super()._set_inputs(op, inputs)
|
|
82
|
+
old_inputs = find_objects(op.args, ENTITY_TYPE) + find_objects(
|
|
83
|
+
op.kwargs, ENTITY_TYPE
|
|
55
84
|
)
|
|
56
|
-
mapping = {o: n for o, n in zip(old_inputs,
|
|
57
|
-
|
|
58
|
-
|
|
85
|
+
mapping = {o: n for o, n in zip(old_inputs, op._inputs[1:])}
|
|
86
|
+
op.args = replace_objects(op.args, mapping)
|
|
87
|
+
op.kwargs = replace_objects(op.kwargs, mapping)
|
|
59
88
|
|
|
60
89
|
@property
|
|
61
90
|
def output_limit(self) -> int:
|
|
@@ -72,6 +101,6 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
|
|
|
72
101
|
return self.new_tileables(inputs, kws=output_kws)
|
|
73
102
|
|
|
74
103
|
|
|
75
|
-
def to_remote_model(model, model_cls):
|
|
104
|
+
def to_remote_model(model, model_cls: Type[ModelWithEval]) -> ModelWithEval:
|
|
76
105
|
op = ModelDataSource(data=model)
|
|
77
106
|
return op(model_cls)
|
maxframe/learn/contrib/utils.py
CHANGED
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
|
|
15
15
|
import sys
|
|
16
16
|
|
|
17
|
+
from ...serialization.serializables import Serializable
|
|
18
|
+
|
|
17
19
|
|
|
18
20
|
def make_import_error_func(package_name):
|
|
19
21
|
def _func(*_, **__): # pragma: no cover
|
|
@@ -51,3 +53,56 @@ def config_mod_getattr(mod_dict, globals_):
|
|
|
51
53
|
"__warningregistry__": dict(),
|
|
52
54
|
}
|
|
53
55
|
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class TrainingCallback(Serializable):
|
|
59
|
+
_local_cls = None
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _load_local_to_remote_mapping(cls, globals_dict):
|
|
63
|
+
if cls._local_to_remote:
|
|
64
|
+
return
|
|
65
|
+
for v in globals_dict.values():
|
|
66
|
+
if isinstance(v, type) and issubclass(v, cls) and v._local_cls is not None:
|
|
67
|
+
cls._local_to_remote[v._local_cls] = v
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_local(cls, callback_obj):
|
|
71
|
+
if isinstance(callback_obj, (list, tuple)):
|
|
72
|
+
return [cls.from_local(x) for x in callback_obj]
|
|
73
|
+
if not type(callback_obj) in cls._local_to_remote:
|
|
74
|
+
return callback_obj
|
|
75
|
+
|
|
76
|
+
kw = {}
|
|
77
|
+
remote_cls = cls._local_to_remote[type(callback_obj)]
|
|
78
|
+
for attr in remote_cls._FIELDS:
|
|
79
|
+
try:
|
|
80
|
+
kw[attr] = getattr(callback_obj, attr)
|
|
81
|
+
except AttributeError:
|
|
82
|
+
pass
|
|
83
|
+
return remote_cls(**kw)
|
|
84
|
+
|
|
85
|
+
def has_custom_code(self) -> bool:
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def remote_to_local(cls, remote_obj):
|
|
90
|
+
if isinstance(remote_obj, (list, tuple)):
|
|
91
|
+
return [cls.remote_to_local(x) for x in remote_obj]
|
|
92
|
+
if not isinstance(remote_obj, TrainingCallback):
|
|
93
|
+
return remote_obj
|
|
94
|
+
return remote_obj.to_local()
|
|
95
|
+
|
|
96
|
+
def _extract_kw(self) -> dict:
|
|
97
|
+
kw = {}
|
|
98
|
+
for attr in type(self)._FIELDS:
|
|
99
|
+
val = getattr(self, attr, None)
|
|
100
|
+
if val is not None:
|
|
101
|
+
kw[attr] = val
|
|
102
|
+
return kw
|
|
103
|
+
|
|
104
|
+
def to_local(self):
|
|
105
|
+
return type(self)._local_cls(**self._extract_kw())
|
|
106
|
+
|
|
107
|
+
def __call__(self, *args, **kwargs):
|
|
108
|
+
return self.to_local()(*args, **kwargs)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Callable, Optional, Sequence, Union
|
|
16
|
+
|
|
17
|
+
from ....serialization.serializables import (
|
|
18
|
+
AnyField,
|
|
19
|
+
BoolField,
|
|
20
|
+
Float32Field,
|
|
21
|
+
Int32Field,
|
|
22
|
+
StringField,
|
|
23
|
+
)
|
|
24
|
+
from ....udf import BuiltinFunction
|
|
25
|
+
from ..utils import TrainingCallback
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from xgboost.callback import EarlyStopping as _EarlyStopping
|
|
29
|
+
from xgboost.callback import LearningRateScheduler as _LearningRateScheduler
|
|
30
|
+
except ImportError:
|
|
31
|
+
_LearningRateScheduler = _EarlyStopping = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class XGBTrainingCallback(TrainingCallback):
|
|
35
|
+
_local_to_remote = {}
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_local(cls, callback_obj):
|
|
39
|
+
cls._load_local_to_remote_mapping(globals())
|
|
40
|
+
return super().from_local(callback_obj)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LearningRateScheduler(XGBTrainingCallback):
|
|
44
|
+
_local_cls = _LearningRateScheduler
|
|
45
|
+
|
|
46
|
+
learning_rates = AnyField("learning_rates", default=None)
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self, learning_rates: Union[Callable[[int], float], Sequence[float]], **kw
|
|
50
|
+
) -> None:
|
|
51
|
+
super().__init__(learning_rates=learning_rates, **kw)
|
|
52
|
+
|
|
53
|
+
def has_custom_code(self) -> bool:
|
|
54
|
+
return not isinstance(self.learning_rates, (tuple, list, BuiltinFunction))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class EarlyStopping(XGBTrainingCallback):
|
|
58
|
+
_local_cls = _EarlyStopping
|
|
59
|
+
|
|
60
|
+
rounds = Int32Field("rounds")
|
|
61
|
+
metric_name = StringField("metric_name", default=None)
|
|
62
|
+
data_name = StringField("data_name", default=None)
|
|
63
|
+
maximize = BoolField("maximize", default=None)
|
|
64
|
+
save_best = BoolField("save_best", default=None)
|
|
65
|
+
min_delta = Float32Field("min_delta", default=None)
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
*,
|
|
70
|
+
rounds: int,
|
|
71
|
+
metric_name: Optional[str] = None,
|
|
72
|
+
data_name: Optional[str] = None,
|
|
73
|
+
maximize: Optional[bool] = None,
|
|
74
|
+
save_best: Optional[bool] = False,
|
|
75
|
+
min_delta: float = 0.0,
|
|
76
|
+
**kw
|
|
77
|
+
) -> None:
|
|
78
|
+
super().__init__(
|
|
79
|
+
rounds=rounds,
|
|
80
|
+
metric_name=metric_name,
|
|
81
|
+
data_name=data_name,
|
|
82
|
+
maximize=maximize,
|
|
83
|
+
save_best=save_best,
|
|
84
|
+
min_delta=min_delta,
|
|
85
|
+
**kw
|
|
86
|
+
)
|
|
@@ -26,9 +26,7 @@ if not xgboost:
|
|
|
26
26
|
else:
|
|
27
27
|
from xgboost.sklearn import XGBClassifierBase
|
|
28
28
|
|
|
29
|
-
from .core import wrap_evaluation_matrices
|
|
30
29
|
from .predict import predict
|
|
31
|
-
from .train import train
|
|
32
30
|
|
|
33
31
|
class XGBClassifier(XGBScikitLearnBase, XGBClassifierBase):
|
|
34
32
|
"""
|
|
@@ -43,6 +41,15 @@ else:
|
|
|
43
41
|
super().__init__(**kwargs)
|
|
44
42
|
self._set_model(xgb_model)
|
|
45
43
|
|
|
44
|
+
def get_xgb_params(self):
|
|
45
|
+
params = super().get_xgb_params()
|
|
46
|
+
if self.n_classes_ > 2:
|
|
47
|
+
params["objective"] = "multi:softprob"
|
|
48
|
+
params["num_class"] = self.n_classes_
|
|
49
|
+
else:
|
|
50
|
+
params["objective"] = "binary:logistic"
|
|
51
|
+
return params
|
|
52
|
+
|
|
46
53
|
def fit(
|
|
47
54
|
self,
|
|
48
55
|
X,
|
|
@@ -50,43 +57,32 @@ else:
|
|
|
50
57
|
sample_weight=None,
|
|
51
58
|
base_margin=None,
|
|
52
59
|
eval_set=None,
|
|
60
|
+
xgb_model=None,
|
|
53
61
|
sample_weight_eval_set=None,
|
|
54
62
|
base_margin_eval_set=None,
|
|
55
63
|
num_class=None,
|
|
56
64
|
**kw,
|
|
57
65
|
):
|
|
58
66
|
session = kw.pop("session", None)
|
|
59
|
-
run_kwargs = kw.pop("run_kwargs",
|
|
60
|
-
|
|
61
|
-
|
|
67
|
+
run_kwargs = kw.pop("run_kwargs", dict())
|
|
68
|
+
|
|
69
|
+
if num_class is not None:
|
|
70
|
+
self.n_classes_ = num_class
|
|
71
|
+
else:
|
|
72
|
+
t_labels = mt.unique(y).execute(session=session, **run_kwargs)
|
|
73
|
+
self.n_classes_ = t_labels.shape[0]
|
|
74
|
+
|
|
75
|
+
super().fit(
|
|
62
76
|
X,
|
|
63
77
|
y,
|
|
64
|
-
sample_weight,
|
|
65
|
-
base_margin,
|
|
66
|
-
eval_set,
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
self._n_features_in = X.shape[1]
|
|
72
|
-
self.n_classes_ = num_class or 1
|
|
73
|
-
if self.n_classes_ > 2:
|
|
74
|
-
params["objective"] = "multi:softprob"
|
|
75
|
-
params["num_class"] = self.n_classes_
|
|
76
|
-
else:
|
|
77
|
-
params["objective"] = "binary:logistic"
|
|
78
|
-
self.evals_result_ = dict()
|
|
79
|
-
result = train(
|
|
80
|
-
params,
|
|
81
|
-
dtrain,
|
|
82
|
-
num_boost_round=self.get_num_boosting_rounds(),
|
|
83
|
-
evals=evals,
|
|
84
|
-
evals_result=self.evals_result_,
|
|
85
|
-
num_class=num_class,
|
|
86
|
-
session=session,
|
|
87
|
-
run_kwargs=run_kwargs,
|
|
78
|
+
sample_weight=sample_weight,
|
|
79
|
+
base_margin=base_margin,
|
|
80
|
+
eval_set=eval_set,
|
|
81
|
+
xgb_model=xgb_model,
|
|
82
|
+
sample_weight_eval_set=sample_weight_eval_set,
|
|
83
|
+
base_margin_eval_set=base_margin_eval_set,
|
|
84
|
+
**kw,
|
|
88
85
|
)
|
|
89
|
-
self._Booster = result
|
|
90
86
|
return self
|
|
91
87
|
|
|
92
88
|
def predict(self, data, **kw):
|