maxframe 1.3.0__cp311-cp311-win32.whl → 2.0.0__cp311-cp311-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp311-win32.pyd +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cp311-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +109 -19
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +62 -9
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +128 -3
- maxframe/dataframe/reduction/core.py +20 -6
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +10 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +21 -58
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/models/dashscope.py +34 -0
- maxframe/learn/contrib/llm/models/managed.py +15 -0
- maxframe/learn/contrib/llm/multi_modal.py +92 -0
- maxframe/learn/contrib/llm/text.py +21 -5
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +54 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +13 -8
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cp311-win32.pyd +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cp311-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +59 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
- maxframe-2.0.0.dist-info/RECORD +939 -0
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.0.dist-info/RECORD +0 -705
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -34,6 +34,98 @@ def generate(
|
|
|
34
34
|
prompt_template: Dict[str, Any],
|
|
35
35
|
params: Dict[str, Any] = None,
|
|
36
36
|
):
|
|
37
|
+
"""
|
|
38
|
+
Generate text with multi model llm based on given data and prompt template.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
data : DataFrame or Series
|
|
43
|
+
Input data used for generation. Can be maxframe DataFrame, Series that contain text to be processed.
|
|
44
|
+
model : MultiModalLLM
|
|
45
|
+
Language model instance support **MultiModal** inputs used for text generation.
|
|
46
|
+
prompt_template : List[Dict[str, List[Dict[str, str]]]]
|
|
47
|
+
List of message with column names as placeholders. Each message contains a role and content. Content is a list of dict, each dict contains a text or image, the value can reference column data from input.
|
|
48
|
+
|
|
49
|
+
Here is an example of prompt template.
|
|
50
|
+
|
|
51
|
+
.. code-block:: python
|
|
52
|
+
|
|
53
|
+
[
|
|
54
|
+
{
|
|
55
|
+
"role": "<role>", # e.g. "user" or "assistant"
|
|
56
|
+
"content": [
|
|
57
|
+
{
|
|
58
|
+
# At least one of these fields is required
|
|
59
|
+
"image": "<image_data_url>", # optional
|
|
60
|
+
"text": "<prompt_text_template>" # optional
|
|
61
|
+
},
|
|
62
|
+
...
|
|
63
|
+
]
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
Where:
|
|
68
|
+
|
|
69
|
+
- ``text`` can be a Python format string using column names from input data as parameters (e.g. ``"{column_name}"``)
|
|
70
|
+
- ``image`` should be a DataURL string following `RFC2397 <https://en.wikipedia.org/wiki/Data_URI_scheme>`_ standard with format.
|
|
71
|
+
|
|
72
|
+
.. code-block:: none
|
|
73
|
+
|
|
74
|
+
data:<mime_type>[;base64],<column_name>
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
params : Dict[str, Any], optional
|
|
78
|
+
Additional parameters for generation configuration, by default None.
|
|
79
|
+
Can include settings like temperature, max_tokens, etc.
|
|
80
|
+
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
DataFrame
|
|
84
|
+
Generated text raw response and success status. If the success is False, the generated text will return the
|
|
85
|
+
error message.
|
|
86
|
+
|
|
87
|
+
Notes
|
|
88
|
+
-----
|
|
89
|
+
- The ``api_key_resource`` parameter should reference a text file resource in MaxCompute that contains only your DashScope API key.
|
|
90
|
+
|
|
91
|
+
- Using DashScope services requires enabling public network access for your MaxCompute project. This can be configured through the MaxCompute console by `enabling the Internet access feature <https://help.aliyun.com/zh/maxcompute/user-guide/network-connection-process>`_ for your project. Without this configuration, the API calls to DashScope will fail due to network connectivity issues.
|
|
92
|
+
|
|
93
|
+
Examples
|
|
94
|
+
--------
|
|
95
|
+
You can initialize a DashScope multi-modal model (such as qwen-vl-max) by providing a model name and an ``api_key_resource``.
|
|
96
|
+
The ``api_key_resource`` is a MaxCompute resource name that points to a text file containing a `DashScope <https://dashscope.aliyun.com/>`_ API key.
|
|
97
|
+
|
|
98
|
+
>>> from maxframe.learn.contrib.llm.models.dashscope import DashScopeMultiModalLLM
|
|
99
|
+
>>> import maxframe.dataframe as md
|
|
100
|
+
>>>
|
|
101
|
+
>>> model = DashScopeMultiModalLLM(
|
|
102
|
+
... name="qwen-vl-max",
|
|
103
|
+
... api_key_resource="<api-key-resource-name>"
|
|
104
|
+
... )
|
|
105
|
+
|
|
106
|
+
We use Data Url Schema to provide multi modal input in prompt template, here is an example to fill in the image from table.
|
|
107
|
+
|
|
108
|
+
Assuming you have a MaxCompute table with two columns: ``image_id`` (as the index) and ``encoded_image_data_base64`` (containing Base64 encoded image data),
|
|
109
|
+
you can construct a prompt message template as follows:
|
|
110
|
+
|
|
111
|
+
>>> df = md.read_odps_table("image_content", index_col="image_id")
|
|
112
|
+
|
|
113
|
+
>>> prompt_template = [
|
|
114
|
+
... {
|
|
115
|
+
... "role": "user",
|
|
116
|
+
... "content": [
|
|
117
|
+
... {
|
|
118
|
+
... "image": "_image_data_base64",
|
|
119
|
+
... },
|
|
120
|
+
... {
|
|
121
|
+
... "text": "Analyze this image in detail",
|
|
122
|
+
... },
|
|
123
|
+
... ],
|
|
124
|
+
... },
|
|
125
|
+
... ]
|
|
126
|
+
>>> result = model.generate(df, prompt_template)
|
|
127
|
+
>>> result.execute()
|
|
128
|
+
"""
|
|
37
129
|
if not isinstance(data, DATAFRAME_TYPE) and not isinstance(data, SERIES_TYPE):
|
|
38
130
|
raise ValueError("data must be a maxframe dataframe or series object")
|
|
39
131
|
if not isinstance(model, MultiModalLLM):
|
|
@@ -145,16 +145,16 @@ def generate(
|
|
|
145
145
|
>>> import maxframe.dataframe as md
|
|
146
146
|
>>>
|
|
147
147
|
>>> # Initialize the model
|
|
148
|
-
>>> llm = ManagedTextLLM(name="Qwen2.5-
|
|
148
|
+
>>> llm = ManagedTextLLM(name="Qwen2.5-0.5B-instruct")
|
|
149
149
|
>>>
|
|
150
150
|
>>> # Prepare prompt template
|
|
151
151
|
>>> messages = [
|
|
152
152
|
... {
|
|
153
153
|
... "role": "user",
|
|
154
|
-
... "content": "{query}",
|
|
154
|
+
... "content": "Help answer following question: {query}",
|
|
155
155
|
... },
|
|
156
156
|
... ]
|
|
157
|
-
|
|
157
|
+
|
|
158
158
|
>>> # Create sample data
|
|
159
159
|
>>> df = md.DataFrame({"query": ["What is machine learning?"]})
|
|
160
160
|
>>>
|
|
@@ -177,7 +177,7 @@ def summary(series, model: TextLLM, index=None):
|
|
|
177
177
|
|
|
178
178
|
Parameters
|
|
179
179
|
----------
|
|
180
|
-
series :
|
|
180
|
+
series : Series
|
|
181
181
|
A maxframe Series containing text data to be summarized.
|
|
182
182
|
Each element should be a text string.
|
|
183
183
|
model : TextLLM
|
|
@@ -189,6 +189,11 @@ def summary(series, model: TextLLM, index=None):
|
|
|
189
189
|
-------
|
|
190
190
|
maxframe.Series
|
|
191
191
|
A pandas Series containing the generated summaries and success status.
|
|
192
|
+
|
|
193
|
+
Notes
|
|
194
|
+
-----
|
|
195
|
+
**Preview:** This API is in preview state and may be unstable.
|
|
196
|
+
The interface may change in future releases.
|
|
192
197
|
"""
|
|
193
198
|
if not isinstance(series, Series):
|
|
194
199
|
raise ValueError("series must be a maxframe series object")
|
|
@@ -208,7 +213,7 @@ def translate(
|
|
|
208
213
|
Parameters
|
|
209
214
|
----------
|
|
210
215
|
series : pandas.Series
|
|
211
|
-
A maxframe Series containing text data to
|
|
216
|
+
A maxframe Series containing text data to translate.
|
|
212
217
|
Each element should be a text string.
|
|
213
218
|
model : TextLLM
|
|
214
219
|
Language model instance used for text summarization.
|
|
@@ -223,6 +228,12 @@ def translate(
|
|
|
223
228
|
-------
|
|
224
229
|
maxframe.Series
|
|
225
230
|
A pandas Series containing the generated translation and success status.
|
|
231
|
+
|
|
232
|
+
Notes
|
|
233
|
+
-----
|
|
234
|
+
**Preview:** This API is in preview state and may be unstable.
|
|
235
|
+
The interface may change in future releases.
|
|
236
|
+
|
|
226
237
|
"""
|
|
227
238
|
if not isinstance(series, Series):
|
|
228
239
|
raise ValueError("series must be a maxframe series object")
|
|
@@ -268,6 +279,11 @@ def classify(
|
|
|
268
279
|
-------
|
|
269
280
|
maxframe.Series
|
|
270
281
|
A pandas Series containing the generated classification results and success status.
|
|
282
|
+
|
|
283
|
+
Notes
|
|
284
|
+
-----
|
|
285
|
+
**Preview:** This API is in preview state and may be unstable.
|
|
286
|
+
The interface may change in future releases.
|
|
271
287
|
"""
|
|
272
288
|
if not isinstance(series, Series):
|
|
273
289
|
raise ValueError("series must be a maxframe series object")
|
maxframe/learn/contrib/models.py
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import Type
|
|
16
|
+
|
|
15
17
|
from ... import opcodes
|
|
16
18
|
from ...core import ENTITY_TYPE, OutputType
|
|
17
19
|
from ...core.operator import ObjectOperator, ObjectOperatorMixin
|
|
@@ -21,7 +23,30 @@ from ...serialization.serializables import (
|
|
|
21
23
|
FunctionField,
|
|
22
24
|
TupleField,
|
|
23
25
|
)
|
|
26
|
+
from ...udf import BuiltinFunction
|
|
24
27
|
from ...utils import find_objects, replace_objects
|
|
28
|
+
from ..core import Model, ModelData
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ModelWithEvalData(ModelData):
|
|
32
|
+
__slots__ = ("_evals_result",)
|
|
33
|
+
|
|
34
|
+
_evals_result: dict
|
|
35
|
+
|
|
36
|
+
def __init__(self, *args, evals_result=None, **kwargs):
|
|
37
|
+
super().__init__(*args, **kwargs)
|
|
38
|
+
self._evals_result = evals_result if evals_result is not None else dict()
|
|
39
|
+
|
|
40
|
+
def execute(self, session=None, **kw):
|
|
41
|
+
# The evals_result should be fetched when BoosterData.execute() is called.
|
|
42
|
+
result = super().execute(session=session, **kw)
|
|
43
|
+
if self.op.has_evals_result and self.key == self.op.outputs[0].key:
|
|
44
|
+
self._evals_result.update(self.op.outputs[1].fetch(session=session))
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ModelWithEval(Model):
|
|
49
|
+
pass
|
|
25
50
|
|
|
26
51
|
|
|
27
52
|
class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
|
|
@@ -29,7 +54,7 @@ class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
|
|
|
29
54
|
|
|
30
55
|
data = AnyField("data")
|
|
31
56
|
|
|
32
|
-
def __call__(self, model_cls):
|
|
57
|
+
def __call__(self, model_cls: Type[ModelWithEval]):
|
|
33
58
|
self._output_types = [OutputType.object]
|
|
34
59
|
return self.new_tileable(None, object_class=model_cls)
|
|
35
60
|
|
|
@@ -48,14 +73,18 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
|
|
|
48
73
|
self._output_types = list(output_types)
|
|
49
74
|
super().__init__(**kwargs)
|
|
50
75
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
76
|
+
def has_custom_code(self) -> bool:
|
|
77
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def _set_inputs(cls, op: "ModelApplyChunk", inputs):
|
|
81
|
+
super()._set_inputs(op, inputs)
|
|
82
|
+
old_inputs = find_objects(op.args, ENTITY_TYPE) + find_objects(
|
|
83
|
+
op.kwargs, ENTITY_TYPE
|
|
55
84
|
)
|
|
56
|
-
mapping = {o: n for o, n in zip(old_inputs,
|
|
57
|
-
|
|
58
|
-
|
|
85
|
+
mapping = {o: n for o, n in zip(old_inputs, op._inputs[1:])}
|
|
86
|
+
op.args = replace_objects(op.args, mapping)
|
|
87
|
+
op.kwargs = replace_objects(op.kwargs, mapping)
|
|
59
88
|
|
|
60
89
|
@property
|
|
61
90
|
def output_limit(self) -> int:
|
|
@@ -72,6 +101,6 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
|
|
|
72
101
|
return self.new_tileables(inputs, kws=output_kws)
|
|
73
102
|
|
|
74
103
|
|
|
75
|
-
def to_remote_model(model, model_cls):
|
|
104
|
+
def to_remote_model(model, model_cls: Type[ModelWithEval]) -> ModelWithEval:
|
|
76
105
|
op = ModelDataSource(data=model)
|
|
77
106
|
return op(model_cls)
|
maxframe/learn/contrib/utils.py
CHANGED
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
|
|
15
15
|
import sys
|
|
16
16
|
|
|
17
|
+
from ...serialization.serializables import Serializable
|
|
18
|
+
|
|
17
19
|
|
|
18
20
|
def make_import_error_func(package_name):
|
|
19
21
|
def _func(*_, **__): # pragma: no cover
|
|
@@ -51,3 +53,56 @@ def config_mod_getattr(mod_dict, globals_):
|
|
|
51
53
|
"__warningregistry__": dict(),
|
|
52
54
|
}
|
|
53
55
|
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class TrainingCallback(Serializable):
|
|
59
|
+
_local_cls = None
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _load_local_to_remote_mapping(cls, globals_dict):
|
|
63
|
+
if cls._local_to_remote:
|
|
64
|
+
return
|
|
65
|
+
for v in globals_dict.values():
|
|
66
|
+
if isinstance(v, type) and issubclass(v, cls) and v._local_cls is not None:
|
|
67
|
+
cls._local_to_remote[v._local_cls] = v
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_local(cls, callback_obj):
|
|
71
|
+
if isinstance(callback_obj, (list, tuple)):
|
|
72
|
+
return [cls.from_local(x) for x in callback_obj]
|
|
73
|
+
if not type(callback_obj) in cls._local_to_remote:
|
|
74
|
+
return callback_obj
|
|
75
|
+
|
|
76
|
+
kw = {}
|
|
77
|
+
remote_cls = cls._local_to_remote[type(callback_obj)]
|
|
78
|
+
for attr in remote_cls._FIELDS:
|
|
79
|
+
try:
|
|
80
|
+
kw[attr] = getattr(callback_obj, attr)
|
|
81
|
+
except AttributeError:
|
|
82
|
+
pass
|
|
83
|
+
return remote_cls(**kw)
|
|
84
|
+
|
|
85
|
+
def has_custom_code(self) -> bool:
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def remote_to_local(cls, remote_obj):
|
|
90
|
+
if isinstance(remote_obj, (list, tuple)):
|
|
91
|
+
return [cls.remote_to_local(x) for x in remote_obj]
|
|
92
|
+
if not isinstance(remote_obj, TrainingCallback):
|
|
93
|
+
return remote_obj
|
|
94
|
+
return remote_obj.to_local()
|
|
95
|
+
|
|
96
|
+
def _extract_kw(self) -> dict:
|
|
97
|
+
kw = {}
|
|
98
|
+
for attr in type(self)._FIELDS:
|
|
99
|
+
val = getattr(self, attr, None)
|
|
100
|
+
if val is not None:
|
|
101
|
+
kw[attr] = val
|
|
102
|
+
return kw
|
|
103
|
+
|
|
104
|
+
def to_local(self):
|
|
105
|
+
return type(self)._local_cls(**self._extract_kw())
|
|
106
|
+
|
|
107
|
+
def __call__(self, *args, **kwargs):
|
|
108
|
+
return self.to_local()(*args, **kwargs)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Callable, Optional, Sequence, Union
|
|
16
|
+
|
|
17
|
+
from ....serialization.serializables import (
|
|
18
|
+
AnyField,
|
|
19
|
+
BoolField,
|
|
20
|
+
Float32Field,
|
|
21
|
+
Int32Field,
|
|
22
|
+
StringField,
|
|
23
|
+
)
|
|
24
|
+
from ....udf import BuiltinFunction
|
|
25
|
+
from ..utils import TrainingCallback
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from xgboost.callback import EarlyStopping as _EarlyStopping
|
|
29
|
+
from xgboost.callback import LearningRateScheduler as _LearningRateScheduler
|
|
30
|
+
except ImportError:
|
|
31
|
+
_LearningRateScheduler = _EarlyStopping = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class XGBTrainingCallback(TrainingCallback):
|
|
35
|
+
_local_to_remote = {}
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_local(cls, callback_obj):
|
|
39
|
+
cls._load_local_to_remote_mapping(globals())
|
|
40
|
+
return super().from_local(callback_obj)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LearningRateScheduler(XGBTrainingCallback):
|
|
44
|
+
_local_cls = _LearningRateScheduler
|
|
45
|
+
|
|
46
|
+
learning_rates = AnyField("learning_rates", default=None)
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self, learning_rates: Union[Callable[[int], float], Sequence[float]], **kw
|
|
50
|
+
) -> None:
|
|
51
|
+
super().__init__(learning_rates=learning_rates, **kw)
|
|
52
|
+
|
|
53
|
+
def has_custom_code(self) -> bool:
|
|
54
|
+
return not isinstance(self.learning_rates, (tuple, list, BuiltinFunction))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class EarlyStopping(XGBTrainingCallback):
|
|
58
|
+
_local_cls = _EarlyStopping
|
|
59
|
+
|
|
60
|
+
rounds = Int32Field("rounds")
|
|
61
|
+
metric_name = StringField("metric_name", default=None)
|
|
62
|
+
data_name = StringField("data_name", default=None)
|
|
63
|
+
maximize = BoolField("maximize", default=None)
|
|
64
|
+
save_best = BoolField("save_best", default=None)
|
|
65
|
+
min_delta = Float32Field("min_delta", default=None)
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
*,
|
|
70
|
+
rounds: int,
|
|
71
|
+
metric_name: Optional[str] = None,
|
|
72
|
+
data_name: Optional[str] = None,
|
|
73
|
+
maximize: Optional[bool] = None,
|
|
74
|
+
save_best: Optional[bool] = False,
|
|
75
|
+
min_delta: float = 0.0,
|
|
76
|
+
**kw
|
|
77
|
+
) -> None:
|
|
78
|
+
super().__init__(
|
|
79
|
+
rounds=rounds,
|
|
80
|
+
metric_name=metric_name,
|
|
81
|
+
data_name=data_name,
|
|
82
|
+
maximize=maximize,
|
|
83
|
+
save_best=save_best,
|
|
84
|
+
min_delta=min_delta,
|
|
85
|
+
**kw
|
|
86
|
+
)
|
|
@@ -26,9 +26,7 @@ if not xgboost:
|
|
|
26
26
|
else:
|
|
27
27
|
from xgboost.sklearn import XGBClassifierBase
|
|
28
28
|
|
|
29
|
-
from .core import wrap_evaluation_matrices
|
|
30
29
|
from .predict import predict
|
|
31
|
-
from .train import train
|
|
32
30
|
|
|
33
31
|
class XGBClassifier(XGBScikitLearnBase, XGBClassifierBase):
|
|
34
32
|
"""
|
|
@@ -43,6 +41,15 @@ else:
|
|
|
43
41
|
super().__init__(**kwargs)
|
|
44
42
|
self._set_model(xgb_model)
|
|
45
43
|
|
|
44
|
+
def get_xgb_params(self):
|
|
45
|
+
params = super().get_xgb_params()
|
|
46
|
+
if self.n_classes_ > 2:
|
|
47
|
+
params["objective"] = "multi:softprob"
|
|
48
|
+
params["num_class"] = self.n_classes_
|
|
49
|
+
else:
|
|
50
|
+
params["objective"] = "binary:logistic"
|
|
51
|
+
return params
|
|
52
|
+
|
|
46
53
|
def fit(
|
|
47
54
|
self,
|
|
48
55
|
X,
|
|
@@ -50,43 +57,32 @@ else:
|
|
|
50
57
|
sample_weight=None,
|
|
51
58
|
base_margin=None,
|
|
52
59
|
eval_set=None,
|
|
60
|
+
xgb_model=None,
|
|
53
61
|
sample_weight_eval_set=None,
|
|
54
62
|
base_margin_eval_set=None,
|
|
55
63
|
num_class=None,
|
|
56
64
|
**kw,
|
|
57
65
|
):
|
|
58
66
|
session = kw.pop("session", None)
|
|
59
|
-
run_kwargs = kw.pop("run_kwargs",
|
|
60
|
-
|
|
61
|
-
|
|
67
|
+
run_kwargs = kw.pop("run_kwargs", dict())
|
|
68
|
+
|
|
69
|
+
if num_class is not None:
|
|
70
|
+
self.n_classes_ = num_class
|
|
71
|
+
else:
|
|
72
|
+
t_labels = mt.unique(y).execute(session=session, **run_kwargs)
|
|
73
|
+
self.n_classes_ = t_labels.shape[0]
|
|
74
|
+
|
|
75
|
+
super().fit(
|
|
62
76
|
X,
|
|
63
77
|
y,
|
|
64
|
-
sample_weight,
|
|
65
|
-
base_margin,
|
|
66
|
-
eval_set,
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
self._n_features_in = X.shape[1]
|
|
72
|
-
self.n_classes_ = num_class or 1
|
|
73
|
-
if self.n_classes_ > 2:
|
|
74
|
-
params["objective"] = "multi:softprob"
|
|
75
|
-
params["num_class"] = self.n_classes_
|
|
76
|
-
else:
|
|
77
|
-
params["objective"] = "binary:logistic"
|
|
78
|
-
self.evals_result_ = dict()
|
|
79
|
-
result = train(
|
|
80
|
-
params,
|
|
81
|
-
dtrain,
|
|
82
|
-
num_boost_round=self.get_num_boosting_rounds(),
|
|
83
|
-
evals=evals,
|
|
84
|
-
evals_result=self.evals_result_,
|
|
85
|
-
num_class=num_class,
|
|
86
|
-
session=session,
|
|
87
|
-
run_kwargs=run_kwargs,
|
|
78
|
+
sample_weight=sample_weight,
|
|
79
|
+
base_margin=base_margin,
|
|
80
|
+
eval_set=eval_set,
|
|
81
|
+
xgb_model=xgb_model,
|
|
82
|
+
sample_weight_eval_set=sample_weight_eval_set,
|
|
83
|
+
base_margin_eval_set=base_margin_eval_set,
|
|
84
|
+
**kw,
|
|
88
85
|
)
|
|
89
|
-
self._Booster = result
|
|
90
86
|
return self
|
|
91
87
|
|
|
92
88
|
def predict(self, data, **kw):
|
|
@@ -19,26 +19,19 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
|
|
22
|
+
from ....errors import TileableNotExecutedError
|
|
23
|
+
|
|
22
24
|
try:
|
|
23
25
|
import xgboost
|
|
24
26
|
except ImportError:
|
|
25
27
|
xgboost = None
|
|
26
28
|
|
|
27
29
|
from ....core import OutputType
|
|
28
|
-
from
|
|
29
|
-
from ..models import ModelApplyChunk, to_remote_model
|
|
30
|
+
from ..models import ModelApplyChunk, ModelWithEval, ModelWithEvalData, to_remote_model
|
|
30
31
|
from .dmatrix import DMatrix
|
|
31
32
|
|
|
32
33
|
|
|
33
|
-
class BoosterData(
|
|
34
|
-
__slots__ = ("_evals_result",)
|
|
35
|
-
|
|
36
|
-
_evals_result: Dict
|
|
37
|
-
|
|
38
|
-
def __init__(self, *args, evals_result=None, **kwargs):
|
|
39
|
-
super().__init__(*args, **kwargs)
|
|
40
|
-
self._evals_result = evals_result if evals_result is not None else dict()
|
|
41
|
-
|
|
34
|
+
class BoosterData(ModelWithEvalData):
|
|
42
35
|
@staticmethod
|
|
43
36
|
def _get_booster_score(bst, fmap=None, importance_type="weight"):
|
|
44
37
|
if not fmap:
|
|
@@ -66,13 +59,6 @@ class BoosterData(ModelData):
|
|
|
66
59
|
fmap_data = fmap_file.read()
|
|
67
60
|
return op(self, [{}], fmap=fmap_data, importance_type=importance_type)[0]
|
|
68
61
|
|
|
69
|
-
def execute(self, session=None, **kw):
|
|
70
|
-
# The evals_result should be fetched when BoosterData.execute() is called.
|
|
71
|
-
result = super().execute(session=session, **kw)
|
|
72
|
-
if self.op.has_evals_result and self.key == self.op.outputs[0].key:
|
|
73
|
-
self._evals_result.update(self.op.outputs[1].fetch(session=session))
|
|
74
|
-
return result
|
|
75
|
-
|
|
76
62
|
def predict(
|
|
77
63
|
self,
|
|
78
64
|
data,
|
|
@@ -103,7 +89,7 @@ class BoosterData(ModelData):
|
|
|
103
89
|
)
|
|
104
90
|
|
|
105
91
|
|
|
106
|
-
class Booster(
|
|
92
|
+
class Booster(ModelWithEval):
|
|
107
93
|
pass
|
|
108
94
|
|
|
109
95
|
|
|
@@ -137,39 +123,65 @@ else:
|
|
|
137
123
|
return names
|
|
138
124
|
|
|
139
125
|
def __repr__(self):
|
|
140
|
-
|
|
141
|
-
|
|
126
|
+
try:
|
|
127
|
+
local_model = self.fetch()
|
|
128
|
+
return repr(local_model)
|
|
129
|
+
except TileableNotExecutedError:
|
|
130
|
+
return f"<{type(self).__name__} unexecuted>"
|
|
142
131
|
|
|
143
132
|
def fit(
|
|
144
133
|
self,
|
|
145
134
|
X,
|
|
146
135
|
y,
|
|
147
|
-
|
|
136
|
+
sample_weight=None,
|
|
137
|
+
base_margin=None,
|
|
148
138
|
eval_set=None,
|
|
139
|
+
xgb_model=None,
|
|
149
140
|
sample_weight_eval_set=None,
|
|
141
|
+
base_margin_eval_set=None,
|
|
150
142
|
**kw,
|
|
151
143
|
):
|
|
152
|
-
|
|
153
|
-
Fit the regressor. Note that fit() is an eager-execution
|
|
154
|
-
API. The call will be blocked until training finished.
|
|
144
|
+
from .train import train
|
|
155
145
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
146
|
+
session = kw.pop("session", None)
|
|
147
|
+
run_kwargs = kw.pop("run_kwargs", dict())
|
|
148
|
+
|
|
149
|
+
self._n_features_in = X.shape[1]
|
|
150
|
+
|
|
151
|
+
dtrain, evals = wrap_evaluation_matrices(
|
|
152
|
+
None,
|
|
153
|
+
X,
|
|
154
|
+
y,
|
|
155
|
+
sample_weight,
|
|
156
|
+
base_margin,
|
|
157
|
+
eval_set,
|
|
158
|
+
sample_weight_eval_set,
|
|
159
|
+
base_margin_eval_set,
|
|
160
|
+
)
|
|
161
|
+
params = self.get_xgb_params()
|
|
162
|
+
if not params.get("objective"):
|
|
163
|
+
params["objective"] = "reg:squarederror"
|
|
164
|
+
self.evals_result_ = dict()
|
|
165
|
+
train_kw = {}
|
|
166
|
+
train_kw.update(kw)
|
|
167
|
+
|
|
168
|
+
if getattr(self, "n_classes_", None):
|
|
169
|
+
train_kw["num_class"] = self.n_classes_
|
|
170
|
+
|
|
171
|
+
result = train(
|
|
172
|
+
params,
|
|
173
|
+
dtrain,
|
|
174
|
+
num_boost_round=self.get_num_boosting_rounds(),
|
|
175
|
+
evals=evals,
|
|
176
|
+
evals_result=self.evals_result_,
|
|
177
|
+
xgb_model=xgb_model,
|
|
178
|
+
callbacks=self.callbacks,
|
|
179
|
+
session=session,
|
|
180
|
+
run_kwargs=run_kwargs,
|
|
181
|
+
**train_kw,
|
|
182
|
+
)
|
|
183
|
+
self._Booster = result
|
|
184
|
+
return self
|
|
173
185
|
|
|
174
186
|
def predict(self, data, **kw):
|
|
175
187
|
"""
|