maxframe 1.3.0__cp311-cp311-macosx_10_9_universal2.whl → 2.0.0b1__cp311-cp311-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-311-darwin.so +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cpython-311-darwin.so +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +33 -3
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +62 -9
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +128 -3
- maxframe/dataframe/reduction/core.py +20 -6
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +8 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +22 -48
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/models/dashscope.py +34 -0
- maxframe/learn/contrib/llm/models/managed.py +15 -0
- maxframe/learn/contrib/llm/multi_modal.py +92 -0
- maxframe/learn/contrib/llm/text.py +21 -5
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +53 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +13 -8
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cpython-311-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +67 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +5 -1
- maxframe-2.0.0b1.dist-info/RECORD +939 -0
- {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.0.dist-info/RECORD +0 -705
- {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -21,13 +21,15 @@ from odps import types as odps_types
|
|
|
21
21
|
from .... import dataframe as md
|
|
22
22
|
from .... import tensor as mt
|
|
23
23
|
from ....core import OutputType
|
|
24
|
-
from ....lib.dtypes_extension import dict_
|
|
24
|
+
from ....lib.dtypes_extension import ArrowDtype, dict_, list_
|
|
25
25
|
from ....utils import pd_release_version
|
|
26
26
|
from ..schema import (
|
|
27
27
|
arrow_schema_to_odps_schema,
|
|
28
28
|
build_dataframe_table_meta,
|
|
29
29
|
build_table_column_name,
|
|
30
30
|
odps_schema_to_arrow_schema,
|
|
31
|
+
odps_schema_to_pandas_dtypes,
|
|
32
|
+
pandas_dtypes_to_arrow_schema,
|
|
31
33
|
pandas_to_odps_schema,
|
|
32
34
|
pandas_types_to_arrow_schema,
|
|
33
35
|
)
|
|
@@ -42,6 +44,8 @@ def _wrap_maxframe_obj(obj, wrap="no"):
|
|
|
42
44
|
obj = md.Series(obj)
|
|
43
45
|
elif isinstance(obj, pd.Index):
|
|
44
46
|
obj = md.Index(obj)
|
|
47
|
+
elif isinstance(obj, np.ndarray):
|
|
48
|
+
obj = mt.tensor(obj)
|
|
45
49
|
else:
|
|
46
50
|
obj = mt.scalar(obj)
|
|
47
51
|
if wrap == "data":
|
|
@@ -191,6 +195,24 @@ def test_pandas_to_odps_schema_scalar(wrap_obj):
|
|
|
191
195
|
assert meta.pd_index_level_names == [None]
|
|
192
196
|
|
|
193
197
|
|
|
198
|
+
@pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
|
|
199
|
+
def test_pandas_to_odps_schema_tensor(wrap_obj):
|
|
200
|
+
data = np.array([1, 2, 3])
|
|
201
|
+
|
|
202
|
+
test_tensor = _wrap_maxframe_obj(data, wrap=wrap_obj)
|
|
203
|
+
if wrap_obj != "no":
|
|
204
|
+
test_tensor.op.data = None
|
|
205
|
+
|
|
206
|
+
schema, meta = pandas_to_odps_schema(test_tensor, unknown_as_string=True)
|
|
207
|
+
assert schema.columns[0].name == "_idx_0"
|
|
208
|
+
assert schema.columns[0].type.name == "bigint"
|
|
209
|
+
assert meta.type == OutputType.tensor
|
|
210
|
+
assert meta.table_column_names == []
|
|
211
|
+
assert meta.table_index_column_names == ["_idx_0"]
|
|
212
|
+
assert meta.pd_column_level_names == []
|
|
213
|
+
assert meta.pd_index_level_names == [None]
|
|
214
|
+
|
|
215
|
+
|
|
194
216
|
def test_odps_arrow_schema_conversion():
|
|
195
217
|
odps_schema = odps_types.OdpsSchema(
|
|
196
218
|
[
|
|
@@ -275,6 +297,95 @@ def test_odps_arrow_schema_conversion():
|
|
|
275
297
|
arrow_schema_to_odps_schema(pa.schema([("col1", pa.float16())]))
|
|
276
298
|
|
|
277
299
|
|
|
300
|
+
def test_odps_pandas_schema_conversion():
|
|
301
|
+
odps_schema = odps_types.OdpsSchema(
|
|
302
|
+
[
|
|
303
|
+
odps_types.Column("col1", "string"),
|
|
304
|
+
odps_types.Column("col2", "binary"),
|
|
305
|
+
odps_types.Column("col3", "tinyint"),
|
|
306
|
+
odps_types.Column("col4", "smallint"),
|
|
307
|
+
odps_types.Column("col5", "int"),
|
|
308
|
+
odps_types.Column("col6", "bigint"),
|
|
309
|
+
odps_types.Column("col7", "boolean"),
|
|
310
|
+
odps_types.Column("col8", "float"),
|
|
311
|
+
odps_types.Column("col9", "double"),
|
|
312
|
+
# odps_types.Column("col10", "date"),
|
|
313
|
+
odps_types.Column("col11", "datetime"),
|
|
314
|
+
odps_types.Column("col12", "timestamp"),
|
|
315
|
+
# odps_types.Column("col13", "decimal(10, 2)"),
|
|
316
|
+
odps_types.Column("col14", "array<string>"),
|
|
317
|
+
odps_types.Column("col15", "map<string, bigint>"),
|
|
318
|
+
# odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
|
|
319
|
+
# odps_types.Column("col17", "CHAR(15)"),
|
|
320
|
+
# odps_types.Column("col18", "VARCHAR(15)"),
|
|
321
|
+
# odps_types.Column("col19", "decimal"),
|
|
322
|
+
]
|
|
323
|
+
)
|
|
324
|
+
pd_dtypes = odps_schema_to_pandas_dtypes(odps_schema)
|
|
325
|
+
pd.testing.assert_series_equal(
|
|
326
|
+
pd_dtypes,
|
|
327
|
+
pd.Series(
|
|
328
|
+
[
|
|
329
|
+
np.dtype("O"), # string
|
|
330
|
+
np.dtype("O"), # binary
|
|
331
|
+
np.dtype(np.int8),
|
|
332
|
+
np.dtype(np.int16),
|
|
333
|
+
np.dtype(np.int32),
|
|
334
|
+
np.dtype(np.int64),
|
|
335
|
+
np.dtype(np.bool_),
|
|
336
|
+
np.dtype(np.float32),
|
|
337
|
+
np.dtype(np.float64),
|
|
338
|
+
np.dtype(
|
|
339
|
+
"datetime64[ms]" if pd_release_version[0] >= 2 else "datetime64[ns]"
|
|
340
|
+
),
|
|
341
|
+
np.dtype("datetime64[ns]"),
|
|
342
|
+
ArrowDtype(pa.list_(pa.string())),
|
|
343
|
+
ArrowDtype(pa.map_(pa.string(), pa.int64())),
|
|
344
|
+
],
|
|
345
|
+
index=[c.name for c in odps_schema.columns],
|
|
346
|
+
),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
expected_odps_schema = odps_types.OdpsSchema(
|
|
350
|
+
[
|
|
351
|
+
odps_types.Column("col1", "string"),
|
|
352
|
+
odps_types.Column("col2", "string"), # binary
|
|
353
|
+
odps_types.Column("col3", "tinyint"),
|
|
354
|
+
odps_types.Column("col4", "smallint"),
|
|
355
|
+
odps_types.Column("col5", "int"),
|
|
356
|
+
odps_types.Column("col6", "bigint"),
|
|
357
|
+
odps_types.Column("col7", "boolean"),
|
|
358
|
+
odps_types.Column("col8", "float"),
|
|
359
|
+
odps_types.Column("col9", "double"),
|
|
360
|
+
# odps_types.Column("col10", "date"),
|
|
361
|
+
odps_types.Column(
|
|
362
|
+
"col11", "datetime" if pd_release_version[0] >= 2 else "timestamp"
|
|
363
|
+
),
|
|
364
|
+
odps_types.Column("col12", "timestamp"),
|
|
365
|
+
# odps_types.Column("col13", "decimal(10, 2)"),
|
|
366
|
+
odps_types.Column("col14", "array<string>"),
|
|
367
|
+
odps_types.Column("col15", "map<string, bigint>"),
|
|
368
|
+
# odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
|
|
369
|
+
# odps_types.Column("col17", "string"),
|
|
370
|
+
# odps_types.Column("col18", "string"),
|
|
371
|
+
# odps_types.Column("col19", "decimal(38, 18)"),
|
|
372
|
+
]
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
odps_schema2 = arrow_schema_to_odps_schema(
|
|
376
|
+
pandas_dtypes_to_arrow_schema(pd_dtypes, unknown_as_string=True)
|
|
377
|
+
)
|
|
378
|
+
assert [c.name for c in expected_odps_schema.columns] == [
|
|
379
|
+
c.name for c in odps_schema2.columns
|
|
380
|
+
]
|
|
381
|
+
assert [c.type for c in expected_odps_schema.columns] == [
|
|
382
|
+
c.type for c in odps_schema2.columns
|
|
383
|
+
]
|
|
384
|
+
|
|
385
|
+
with pytest.raises(TypeError):
|
|
386
|
+
arrow_schema_to_odps_schema(pa.schema([("col1", pa.float16())]))
|
|
387
|
+
|
|
388
|
+
|
|
278
389
|
def test_build_column_name():
|
|
279
390
|
records = dict()
|
|
280
391
|
assert build_table_column_name(0, "a" * 127, records) == "a" * 127
|
|
@@ -345,8 +456,10 @@ def test_pandas_types_to_arrow_schema():
|
|
|
345
456
|
{
|
|
346
457
|
"int8": pd.Series([], dtype=np.int8),
|
|
347
458
|
"map": pd.Series([], dtype=dict_(pa.string(), pa.string())),
|
|
459
|
+
"list": pd.Series([], dtype=list_(pa.string())),
|
|
348
460
|
},
|
|
349
461
|
)
|
|
350
462
|
schema = pandas_types_to_arrow_schema(pd_data)
|
|
351
463
|
assert schema.field("int8").type == pa.int8()
|
|
352
464
|
assert schema.field("map").type == pa.map_(pa.string(), pa.string())
|
|
465
|
+
assert schema.field("list").type == pa.list_(pa.string())
|
|
@@ -14,15 +14,19 @@
|
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
16
|
|
|
17
|
+
import mock
|
|
17
18
|
import numpy as np
|
|
18
19
|
import pandas as pd
|
|
19
20
|
import pyarrow as pa
|
|
20
21
|
import pytest
|
|
21
22
|
from odps import ODPS
|
|
23
|
+
from odps.errors import TableModified
|
|
24
|
+
from odps.models import Table
|
|
22
25
|
|
|
23
26
|
from ....config import options
|
|
24
27
|
from ....tests.utils import flaky, tn
|
|
25
28
|
from ....utils import config_odps_default_options
|
|
29
|
+
from .. import TunnelTableIO
|
|
26
30
|
from ..tableio import ODPSTableIO
|
|
27
31
|
|
|
28
32
|
|
|
@@ -161,3 +165,41 @@ def test_table_io_with_parts(switch_table_io):
|
|
|
161
165
|
pd.testing.assert_frame_equal(reader.read_all().to_pandas(), expected_data)
|
|
162
166
|
finally:
|
|
163
167
|
tb.drop()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_tunnel_table_io_with_modified():
|
|
171
|
+
config_odps_default_options()
|
|
172
|
+
|
|
173
|
+
o = ODPS.from_environments()
|
|
174
|
+
table_io = TunnelTableIO(o)
|
|
175
|
+
|
|
176
|
+
# test read and write tables with partition
|
|
177
|
+
parted_table_name = tn("test_tunnel_write_modified")
|
|
178
|
+
o.delete_table(parted_table_name, if_exists=True)
|
|
179
|
+
tb = o.create_table(
|
|
180
|
+
parted_table_name,
|
|
181
|
+
(",".join(f"{c} double" for c in "abcde"), "pt string"),
|
|
182
|
+
lifecycle=1,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
raised = False
|
|
186
|
+
raw_open_reader = Table.open_reader
|
|
187
|
+
|
|
188
|
+
def _new_open_reader(self, *args, **kwargs):
|
|
189
|
+
nonlocal raised
|
|
190
|
+
if not raised:
|
|
191
|
+
raised = True
|
|
192
|
+
raise TableModified("Intentional error")
|
|
193
|
+
return raw_open_reader(self, *args, **kwargs)
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
|
|
197
|
+
with table_io.open_writer(parted_table_name, "pt=test") as writer:
|
|
198
|
+
writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
|
|
199
|
+
|
|
200
|
+
with mock.patch(
|
|
201
|
+
"odps.models.table.Table.open_reader", new=_new_open_reader
|
|
202
|
+
), table_io.open_reader(parted_table_name, "pt=test") as reader:
|
|
203
|
+
pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
|
|
204
|
+
finally:
|
|
205
|
+
tb.drop()
|
|
@@ -12,10 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import contextlib
|
|
16
|
+
|
|
15
17
|
import pytest
|
|
16
18
|
from odps import ODPS
|
|
17
19
|
|
|
18
|
-
from ....tests.utils import tn
|
|
20
|
+
from ....tests.utils import create_test_volume, tn
|
|
19
21
|
from ..volumeio import ODPSVolumeReader, ODPSVolumeWriter
|
|
20
22
|
|
|
21
23
|
|
|
@@ -24,59 +26,31 @@ def create_volume(request, oss_config):
|
|
|
24
26
|
test_vol_name = tn("test_vol_name_" + request.param)
|
|
25
27
|
odps_entry = ODPS.from_environments()
|
|
26
28
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
@contextlib.contextmanager
|
|
30
|
+
def create_parted_volume():
|
|
31
|
+
try:
|
|
32
|
+
odps_entry.delete_volume(test_vol_name)
|
|
33
|
+
except:
|
|
34
|
+
pass
|
|
35
|
+
try:
|
|
36
|
+
odps_entry.create_parted_volume(test_vol_name)
|
|
37
|
+
yield
|
|
38
|
+
finally:
|
|
39
|
+
try:
|
|
40
|
+
odps_entry.delete_volume(test_vol_name)
|
|
41
|
+
except BaseException:
|
|
42
|
+
pass
|
|
31
43
|
|
|
32
44
|
oss_test_dir_name = None
|
|
33
45
|
if request.param == "parted":
|
|
34
|
-
|
|
46
|
+
ctx = create_parted_volume()
|
|
35
47
|
else:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
pytest.skip("Need oss and its config to run this test")
|
|
39
|
-
(
|
|
40
|
-
oss_access_id,
|
|
41
|
-
oss_secret_access_key,
|
|
42
|
-
oss_bucket_name,
|
|
43
|
-
oss_endpoint,
|
|
44
|
-
) = oss_config.oss_config
|
|
45
|
-
|
|
46
|
-
if "test" in oss_endpoint:
|
|
47
|
-
# offline config
|
|
48
|
-
test_location = "oss://%s:%s@%s/%s/%s" % (
|
|
49
|
-
oss_access_id,
|
|
50
|
-
oss_secret_access_key,
|
|
51
|
-
oss_endpoint,
|
|
52
|
-
oss_bucket_name,
|
|
53
|
-
oss_test_dir_name,
|
|
54
|
-
)
|
|
55
|
-
rolearn = None
|
|
56
|
-
else:
|
|
57
|
-
# online config
|
|
58
|
-
endpoint_parts = oss_endpoint.split(".", 1)
|
|
59
|
-
if "-internal" not in endpoint_parts[0]:
|
|
60
|
-
endpoint_parts[0] += "-internal"
|
|
61
|
-
test_location = "oss://%s/%s/%s" % (
|
|
62
|
-
".".join(endpoint_parts),
|
|
63
|
-
oss_bucket_name,
|
|
64
|
-
oss_test_dir_name,
|
|
65
|
-
)
|
|
66
|
-
rolearn = oss_config.oss_rolearn
|
|
67
|
-
|
|
68
|
-
oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
|
|
69
|
-
odps_entry.create_external_volume(
|
|
70
|
-
test_vol_name, location=test_location, rolearn=rolearn
|
|
71
|
-
)
|
|
48
|
+
ctx = create_test_volume(test_vol_name, oss_config)
|
|
49
|
+
|
|
72
50
|
try:
|
|
73
|
-
|
|
51
|
+
with ctx:
|
|
52
|
+
yield test_vol_name
|
|
74
53
|
finally:
|
|
75
|
-
try:
|
|
76
|
-
odps_entry.delete_volume(test_vol_name)
|
|
77
|
-
except BaseException:
|
|
78
|
-
pass
|
|
79
|
-
|
|
80
54
|
if oss_test_dir_name is not None:
|
|
81
55
|
import oss2
|
|
82
56
|
|
maxframe/learn/__init__.py
CHANGED
|
@@ -12,6 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from . import graph,
|
|
15
|
+
from . import graph, lightgbm, llm, pytorch
|
|
16
16
|
|
|
17
|
-
del graph,
|
|
17
|
+
del graph, lightgbm, llm, pytorch
|
|
@@ -19,8 +19,9 @@ from maxframe import opcodes
|
|
|
19
19
|
|
|
20
20
|
from ....core import OutputType
|
|
21
21
|
from ....dataframe.operators import DataFrameOperator, DataFrameOperatorMixin
|
|
22
|
-
from ....dataframe.utils import
|
|
22
|
+
from ....dataframe.utils import parse_index
|
|
23
23
|
from ....serialization.serializables import Int32Field, StringField
|
|
24
|
+
from ....utils import make_dtypes
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
class DataFrameConnectedComponentsOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ._predict import predict
|
|
16
|
+
from ._train import train
|
|
17
|
+
from .dataset import Dataset
|
|
18
|
+
|
|
19
|
+
# isort: off
|
|
20
|
+
from ..utils import config_mod_getattr as _config_mod_getattr
|
|
21
|
+
|
|
22
|
+
_config_mod_getattr(
|
|
23
|
+
{
|
|
24
|
+
"LGBMClassifier": ".classifier.LGBMClassifier",
|
|
25
|
+
"LGBMRegressor": ".regressor.LGBMRegressor",
|
|
26
|
+
# "LGBMRanker": ".ranker.LGBMRanker",
|
|
27
|
+
},
|
|
28
|
+
globals(),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
del _config_mod_getattr
|
|
32
|
+
|
|
33
|
+
__all__ = ["Dataset", "LGBMClassifier", "LGBMRegressor", "predict", "train"]
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Dict, List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....core import OutputType
|
|
21
|
+
from ....core.operator import Operator, TileableOperatorMixin
|
|
22
|
+
from ....serialization.serializables import BoolField, DictField, Int32Field, KeyField
|
|
23
|
+
from ....tensor.core import TensorOrder
|
|
24
|
+
from ....typing_ import EntityType
|
|
25
|
+
from ..models import to_remote_model
|
|
26
|
+
from .dataset import check_data
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class LGBMPredict(Operator, TileableOperatorMixin):
|
|
30
|
+
_op_type_ = opcodes.LGBM_PREDICT
|
|
31
|
+
_op_module_ = "learn.contrib.lightgbm"
|
|
32
|
+
output_dtype = np.dtype(np.float64)
|
|
33
|
+
|
|
34
|
+
data = KeyField("data", default=None)
|
|
35
|
+
booster = KeyField("booster", default=None)
|
|
36
|
+
start_iteration = Int32Field("start_iteration", default=0)
|
|
37
|
+
num_iterations = Int32Field("num_iterations", default=None)
|
|
38
|
+
raw_score = BoolField("raw_score", default=False)
|
|
39
|
+
pred_leaf = BoolField("pred_leaf", default=False)
|
|
40
|
+
pred_contrib = BoolField("pred_contrib", default=False)
|
|
41
|
+
validate_features = BoolField("validate_features", default=False)
|
|
42
|
+
kwds = DictField("kwds", default=None)
|
|
43
|
+
|
|
44
|
+
def __init__(self, output_types=None, **kw):
|
|
45
|
+
super().__init__(_output_types=output_types, **kw)
|
|
46
|
+
|
|
47
|
+
def has_custom_code(self) -> bool:
|
|
48
|
+
return True
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def output_limit(self) -> int:
|
|
52
|
+
return 1 + self.pred_leaf + self.pred_contrib
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def _set_inputs(cls, op: "LGBMPredict", inputs: List[EntityType]):
|
|
56
|
+
super()._set_inputs(op, inputs)
|
|
57
|
+
it = iter(inputs)
|
|
58
|
+
op.data = next(it)
|
|
59
|
+
op.booster = next(it)
|
|
60
|
+
|
|
61
|
+
def __call__(self):
|
|
62
|
+
num_class = getattr(self.booster.op, "num_class", None)
|
|
63
|
+
|
|
64
|
+
result_kw: Dict[str, Any] = {
|
|
65
|
+
"dtype": self.output_dtype,
|
|
66
|
+
"order": TensorOrder.C_ORDER,
|
|
67
|
+
}
|
|
68
|
+
kws = [result_kw]
|
|
69
|
+
|
|
70
|
+
if num_class is not None:
|
|
71
|
+
num_class = int(num_class)
|
|
72
|
+
if num_class is not None and num_class > 2:
|
|
73
|
+
result_kw["shape"] = (self.data.shape[0], num_class)
|
|
74
|
+
else:
|
|
75
|
+
result_kw["shape"] = (self.data.shape[0],)
|
|
76
|
+
|
|
77
|
+
if self.pred_leaf:
|
|
78
|
+
kws.append(
|
|
79
|
+
{
|
|
80
|
+
"shape": (np.nan, np.nan),
|
|
81
|
+
"dtype": np.dtype(np.float_),
|
|
82
|
+
"order": TensorOrder.C_ORDER,
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
if self.pred_contrib:
|
|
86
|
+
kws.append(
|
|
87
|
+
{
|
|
88
|
+
"shape": (np.nan, np.nan),
|
|
89
|
+
"dtype": np.dtype(np.float_),
|
|
90
|
+
"order": TensorOrder.C_ORDER,
|
|
91
|
+
}
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return self.new_tileables([self.data, self.booster], kws=kws)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def predict(
|
|
98
|
+
booster,
|
|
99
|
+
data,
|
|
100
|
+
raw_score: bool = False,
|
|
101
|
+
start_iteration: int = 0,
|
|
102
|
+
num_iteration: int = None,
|
|
103
|
+
pred_leaf: bool = False,
|
|
104
|
+
pred_contrib: bool = False,
|
|
105
|
+
validate_features: bool = False,
|
|
106
|
+
**kwargs,
|
|
107
|
+
):
|
|
108
|
+
import lightgbm
|
|
109
|
+
|
|
110
|
+
from .core import Booster, BoosterData
|
|
111
|
+
|
|
112
|
+
if not isinstance(booster, (Booster, BoosterData, lightgbm.Booster)):
|
|
113
|
+
raise TypeError(
|
|
114
|
+
f"model has to be a lightgbm.Booster, got {type(booster)} instead"
|
|
115
|
+
)
|
|
116
|
+
elif isinstance(booster, lightgbm.Booster):
|
|
117
|
+
booster = to_remote_model(booster, model_cls=Booster)
|
|
118
|
+
proba = kwargs.pop("proba", False)
|
|
119
|
+
|
|
120
|
+
data = check_data(data)
|
|
121
|
+
|
|
122
|
+
op = LGBMPredict(
|
|
123
|
+
data=data,
|
|
124
|
+
booster=booster,
|
|
125
|
+
output_types=[OutputType.tensor],
|
|
126
|
+
proba=proba,
|
|
127
|
+
raw_score=raw_score,
|
|
128
|
+
start_iteration=start_iteration,
|
|
129
|
+
num_iteration=num_iteration,
|
|
130
|
+
pred_leaf=pred_leaf,
|
|
131
|
+
pred_contrib=pred_contrib,
|
|
132
|
+
validate_features=validate_features,
|
|
133
|
+
kwds=kwargs,
|
|
134
|
+
)
|
|
135
|
+
results = op()
|
|
136
|
+
if len(results) == 1:
|
|
137
|
+
return results[0]
|
|
138
|
+
return results
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import itertools
|
|
16
|
+
import logging
|
|
17
|
+
from typing import List
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....core import OutputType
|
|
21
|
+
from ....core.operator import ObjectOperator, ObjectOperatorMixin
|
|
22
|
+
from ....serialization.serializables import (
|
|
23
|
+
AnyField,
|
|
24
|
+
BoolField,
|
|
25
|
+
DictField,
|
|
26
|
+
FieldTypes,
|
|
27
|
+
FunctionField,
|
|
28
|
+
Int32Field,
|
|
29
|
+
KeyField,
|
|
30
|
+
ListField,
|
|
31
|
+
StringField,
|
|
32
|
+
)
|
|
33
|
+
from ....typing_ import EntityType
|
|
34
|
+
from ..models import to_remote_model
|
|
35
|
+
from ..utils import TrainingCallback
|
|
36
|
+
from .core import Booster, BoosterData, LGBMScikitLearnBase
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LGBMTrain(ObjectOperator, ObjectOperatorMixin):
|
|
42
|
+
_op_type_ = opcodes.LGBM_TRAIN
|
|
43
|
+
_op_module_ = "learn.contrib.lightgbm"
|
|
44
|
+
|
|
45
|
+
params = DictField("params", key_type=FieldTypes.string, default_factory=dict)
|
|
46
|
+
train_set = KeyField("train_set", default=None)
|
|
47
|
+
num_boost_round = Int32Field("num_boost_round", default=None)
|
|
48
|
+
valid_sets = ListField("valid_sets", FieldTypes.key, default_factory=list)
|
|
49
|
+
valid_names = ListField("valid_names", FieldTypes.string, default_factory=list)
|
|
50
|
+
feval = FunctionField("feval", default=None)
|
|
51
|
+
init_model = KeyField("init_model", default=None)
|
|
52
|
+
feature_name = AnyField("feature_name", default=None)
|
|
53
|
+
categorical_feature = AnyField("categorical_feature", default=None)
|
|
54
|
+
keep_training_booster = BoolField("keep_training_booster", default=False)
|
|
55
|
+
callbacks = ListField(
|
|
56
|
+
"callbacks",
|
|
57
|
+
field_type=FunctionField.field_type,
|
|
58
|
+
default=None,
|
|
59
|
+
on_serialize=TrainingCallback.from_local,
|
|
60
|
+
)
|
|
61
|
+
tree_learner = StringField("tree_learner", default=None)
|
|
62
|
+
timeout = Int32Field("timeout", default=None)
|
|
63
|
+
# indicating shape of the predicted data of the model
|
|
64
|
+
num_class = Int32Field("num_class", default=None)
|
|
65
|
+
|
|
66
|
+
def __init__(self, gpu=None, **kw):
|
|
67
|
+
super().__init__(gpu=gpu, **kw)
|
|
68
|
+
if self.output_types is None:
|
|
69
|
+
self.output_types = [OutputType.object]
|
|
70
|
+
if self.has_evals_result:
|
|
71
|
+
self.output_types.append(OutputType.object)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def _set_inputs(cls, op: "LGBMTrain", inputs: List[EntityType]):
|
|
75
|
+
super()._set_inputs(op, inputs)
|
|
76
|
+
input_it = iter(op.inputs)
|
|
77
|
+
op.train_set = next(input_it)
|
|
78
|
+
op.valid_sets = list(itertools.islice(input_it, len(op.valid_sets or [])))
|
|
79
|
+
if op.init_model is not None:
|
|
80
|
+
op.init_model = next(input_it)
|
|
81
|
+
|
|
82
|
+
def __call__(self, evals_result):
|
|
83
|
+
inputs = [self.train_set]
|
|
84
|
+
if self.has_evals_result:
|
|
85
|
+
inputs.extend(self.valid_sets)
|
|
86
|
+
if self.init_model is not None:
|
|
87
|
+
inputs.append(self.init_model)
|
|
88
|
+
kws = [{"object_class": Booster}, {}]
|
|
89
|
+
return self.new_tileables(inputs, kws=kws, evals_result=evals_result)[0]
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def output_limit(self):
|
|
93
|
+
return 2 if self.has_evals_result else 1
|
|
94
|
+
|
|
95
|
+
def has_custom_code(self) -> bool:
|
|
96
|
+
if not self.callbacks:
|
|
97
|
+
return False
|
|
98
|
+
return any(
|
|
99
|
+
not isinstance(cb, TrainingCallback) or cb.has_custom_code()
|
|
100
|
+
for cb in self.callbacks
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def has_evals_result(self) -> bool:
|
|
105
|
+
return bool(self.valid_sets)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _get_lgbm_booster(init_model):
|
|
109
|
+
import lightgbm
|
|
110
|
+
|
|
111
|
+
if isinstance(init_model, (LGBMScikitLearnBase, lightgbm.LGBMModel)):
|
|
112
|
+
init_model = init_model.booster_
|
|
113
|
+
|
|
114
|
+
if isinstance(init_model, (Booster, BoosterData)):
|
|
115
|
+
return init_model
|
|
116
|
+
elif isinstance(init_model, lightgbm.Booster):
|
|
117
|
+
return to_remote_model(init_model, model_cls=Booster)
|
|
118
|
+
raise ValueError(f"Cannot use {type(init_model)} as init_model")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def train(
|
|
122
|
+
params,
|
|
123
|
+
train_set,
|
|
124
|
+
num_boost_round=100,
|
|
125
|
+
valid_sets=None,
|
|
126
|
+
valid_names=None,
|
|
127
|
+
feval=None,
|
|
128
|
+
init_model=None,
|
|
129
|
+
keep_training_booster=False,
|
|
130
|
+
callbacks=None,
|
|
131
|
+
num_class=2,
|
|
132
|
+
evals_result=None,
|
|
133
|
+
**kwargs,
|
|
134
|
+
):
|
|
135
|
+
evals_result = evals_result if evals_result is not None else dict()
|
|
136
|
+
timeout = kwargs.pop("timeout", 120)
|
|
137
|
+
session = kwargs.pop("session", None)
|
|
138
|
+
run_kwargs = kwargs.pop("run_kwargs", dict())
|
|
139
|
+
if valid_sets and valid_names:
|
|
140
|
+
if not isinstance(valid_names, list):
|
|
141
|
+
raise TypeError("valid_names must be a list of strings")
|
|
142
|
+
if len(valid_names) != len(valid_sets):
|
|
143
|
+
raise ValueError("size of valid_names must match size of valid_sets")
|
|
144
|
+
if init_model is not None:
|
|
145
|
+
init_model = _get_lgbm_booster(init_model)
|
|
146
|
+
data = LGBMTrain(
|
|
147
|
+
params=params,
|
|
148
|
+
train_set=train_set,
|
|
149
|
+
num_boost_round=num_boost_round,
|
|
150
|
+
valid_sets=valid_sets,
|
|
151
|
+
valid_names=valid_names,
|
|
152
|
+
feval=feval,
|
|
153
|
+
init_model=init_model,
|
|
154
|
+
keep_training_booster=keep_training_booster,
|
|
155
|
+
callbacks=callbacks,
|
|
156
|
+
num_class=num_class,
|
|
157
|
+
evals_result=evals_result,
|
|
158
|
+
timeout=timeout,
|
|
159
|
+
**kwargs,
|
|
160
|
+
)(evals_result)
|
|
161
|
+
if valid_sets:
|
|
162
|
+
data.execute(session=session, **run_kwargs)
|
|
163
|
+
return data
|