maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -20,18 +20,34 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
|
|
22
22
|
from ....errors import TileableNotExecutedError
|
|
23
|
+
from ....udf import builtin_function
|
|
23
24
|
|
|
24
25
|
try:
|
|
25
26
|
import xgboost
|
|
26
|
-
except ImportError:
|
|
27
|
+
except ImportError: # pragma: no cover
|
|
27
28
|
xgboost = None
|
|
28
29
|
|
|
29
|
-
from ....core import OutputType
|
|
30
|
+
from ....core import OutputType, enter_mode, is_kernel_mode
|
|
31
|
+
from ...utils.odpsio import ODPSModelMixin, ReadODPSModel
|
|
30
32
|
from ..models import ModelApplyChunk, ModelWithEval, ModelWithEvalData, to_remote_model
|
|
31
33
|
from .dmatrix import DMatrix
|
|
32
34
|
|
|
35
|
+
_xgb_type_to_np_type = {
|
|
36
|
+
"float": "float32",
|
|
37
|
+
"int": "int32",
|
|
38
|
+
"i": "bool",
|
|
39
|
+
}
|
|
40
|
+
|
|
33
41
|
|
|
34
42
|
class BoosterData(ModelWithEvalData):
|
|
43
|
+
def save_config(self) -> str:
|
|
44
|
+
try:
|
|
45
|
+
return self.fetch().save_config()
|
|
46
|
+
except:
|
|
47
|
+
if is_kernel_mode():
|
|
48
|
+
return "{}"
|
|
49
|
+
raise
|
|
50
|
+
|
|
35
51
|
@staticmethod
|
|
36
52
|
def _get_booster_score(bst, fmap=None, importance_type="weight"):
|
|
37
53
|
if not fmap:
|
|
@@ -88,6 +104,58 @@ class BoosterData(ModelWithEvalData):
|
|
|
88
104
|
strict_shape=strict_shape,
|
|
89
105
|
)
|
|
90
106
|
|
|
107
|
+
@staticmethod
|
|
108
|
+
@builtin_function
|
|
109
|
+
def _get_training_info(bst, evals_result, local_info):
|
|
110
|
+
model_infos = {
|
|
111
|
+
"iteration": bst.num_boosted_rounds(),
|
|
112
|
+
}
|
|
113
|
+
if evals_result:
|
|
114
|
+
model_infos.update(
|
|
115
|
+
dict(
|
|
116
|
+
duration_ms=evals_result.get("duration_ms"),
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
if bst.feature_names:
|
|
120
|
+
model_infos["feature_names"] = bst.feature_names
|
|
121
|
+
model_infos["feature_types"] = [
|
|
122
|
+
_xgb_type_to_np_type[x] for x in bst.feature_types
|
|
123
|
+
]
|
|
124
|
+
model_infos.update(local_info or {})
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
config = json.loads(bst.save_config())
|
|
128
|
+
stack = [config]
|
|
129
|
+
internal = {}
|
|
130
|
+
while stack:
|
|
131
|
+
obj = stack.pop()
|
|
132
|
+
for k, v in obj.items():
|
|
133
|
+
if k.endswith("_param"):
|
|
134
|
+
for p_k, p_v in v.items():
|
|
135
|
+
internal[p_k] = p_v
|
|
136
|
+
elif isinstance(v, dict):
|
|
137
|
+
stack.append(v)
|
|
138
|
+
|
|
139
|
+
for k, v in internal.items():
|
|
140
|
+
for t in (int, float, str):
|
|
141
|
+
try:
|
|
142
|
+
model_infos[k] = t(v)
|
|
143
|
+
break
|
|
144
|
+
except ValueError:
|
|
145
|
+
continue
|
|
146
|
+
except ValueError:
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
return model_infos
|
|
150
|
+
|
|
151
|
+
def get_training_info(self, evals_result: dict = None, local_info: dict = None):
|
|
152
|
+
evals_result = getattr(self, "_evals_result", None) or evals_result
|
|
153
|
+
args = (evals_result, local_info)
|
|
154
|
+
op = ModelApplyChunk(
|
|
155
|
+
func=self._get_training_info, output_types=[OutputType.object]
|
|
156
|
+
)
|
|
157
|
+
return op(self, [{}], args=args)[0]
|
|
158
|
+
|
|
91
159
|
|
|
92
160
|
class Booster(ModelWithEval):
|
|
93
161
|
pass
|
|
@@ -97,7 +165,7 @@ if not xgboost:
|
|
|
97
165
|
XGBScikitLearnBase = None
|
|
98
166
|
else:
|
|
99
167
|
|
|
100
|
-
class XGBScikitLearnBase(xgboost.XGBModel):
|
|
168
|
+
class XGBScikitLearnBase(xgboost.XGBModel, ODPSModelMixin):
|
|
101
169
|
"""
|
|
102
170
|
Base class for implementing scikit-learn interface
|
|
103
171
|
"""
|
|
@@ -158,7 +226,8 @@ else:
|
|
|
158
226
|
sample_weight_eval_set,
|
|
159
227
|
base_margin_eval_set,
|
|
160
228
|
)
|
|
161
|
-
|
|
229
|
+
with enter_mode(kernel=True):
|
|
230
|
+
params = self.get_xgb_params()
|
|
162
231
|
if not params.get("objective"):
|
|
163
232
|
params["objective"] = "reg:squarederror"
|
|
164
233
|
self.evals_result_ = dict()
|
|
@@ -181,6 +250,7 @@ else:
|
|
|
181
250
|
**train_kw,
|
|
182
251
|
)
|
|
183
252
|
self._Booster = result
|
|
253
|
+
self.evals_result_t_ = result.op.outputs[-1]
|
|
184
254
|
return self
|
|
185
255
|
|
|
186
256
|
def predict(self, data, **kw):
|
|
@@ -276,6 +346,45 @@ else:
|
|
|
276
346
|
n_features=self._n_features_in,
|
|
277
347
|
)[0]
|
|
278
348
|
|
|
349
|
+
@property
|
|
350
|
+
def training_info_(self):
|
|
351
|
+
local_info = {}
|
|
352
|
+
attrs = [
|
|
353
|
+
"n_classes_",
|
|
354
|
+
"learning_rate",
|
|
355
|
+
]
|
|
356
|
+
for attr in attrs:
|
|
357
|
+
if getattr(self, attr, None):
|
|
358
|
+
local_info[attr] = getattr(self, attr)
|
|
359
|
+
return self._Booster.get_training_info(
|
|
360
|
+
evals_result=self.evals_result_t_, local_info=local_info
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
def _get_odps_model_info(self) -> ODPSModelMixin.ODPSModelInfo:
|
|
364
|
+
model_format = (
|
|
365
|
+
"BOOSTED_TREE_CLASSIFIER"
|
|
366
|
+
if hasattr(self, "predict_proba")
|
|
367
|
+
else "BOOSTED_TREE_REGRESSOR"
|
|
368
|
+
)
|
|
369
|
+
return ODPSModelMixin.ODPSModelInfo(
|
|
370
|
+
model_format=model_format, model_params=self._Booster
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
@classmethod
|
|
374
|
+
def _build_odps_source_model(cls, op: ReadODPSModel) -> Any:
|
|
375
|
+
if not (
|
|
376
|
+
op.format == "BOOSTED_TREE_CLASSIFIER" and hasattr(cls, "predict_proba")
|
|
377
|
+
) and not (
|
|
378
|
+
op.format == "BOOSTED_TREE_REGRESSOR"
|
|
379
|
+
and not hasattr(cls, "predict_proba")
|
|
380
|
+
):
|
|
381
|
+
return None
|
|
382
|
+
op._output_types = [OutputType.object]
|
|
383
|
+
booster = op.new_tileable(None, object_class=Booster)
|
|
384
|
+
estimator = cls()
|
|
385
|
+
estimator._Booster = booster
|
|
386
|
+
return estimator
|
|
387
|
+
|
|
279
388
|
def wrap_evaluation_matrices(
|
|
280
389
|
missing: float,
|
|
281
390
|
X: Any,
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from typing import List
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
18
19
|
|
|
19
20
|
from .... import opcodes
|
|
20
21
|
from ....core import EntityData
|
|
@@ -62,9 +63,10 @@ class XGBPredict(Operator, TileableOperatorMixin):
|
|
|
62
63
|
|
|
63
64
|
def __call__(self):
|
|
64
65
|
num_class = getattr(self.model.op, "num_class", None)
|
|
65
|
-
|
|
66
|
+
output_ndim = getattr(self.model.op, "output_ndim", None)
|
|
67
|
+
if num_class is not None and not pd.isna(num_class):
|
|
66
68
|
num_class = int(num_class)
|
|
67
|
-
if num_class is not None and num_class > 2:
|
|
69
|
+
if num_class is not None and (num_class > 2 or output_ndim == 2):
|
|
68
70
|
shape = (self.data.shape[0], num_class)
|
|
69
71
|
else:
|
|
70
72
|
shape = (self.data.shape[0],)
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import Union
|
|
16
16
|
|
|
17
|
+
from ...utils.odpsio import register_odps_model
|
|
17
18
|
from ..utils import make_import_error_func
|
|
18
19
|
from .core import XGBScikitLearnBase, xgboost
|
|
19
20
|
|
|
@@ -24,6 +25,7 @@ else:
|
|
|
24
25
|
|
|
25
26
|
from .predict import predict
|
|
26
27
|
|
|
28
|
+
@register_odps_model
|
|
27
29
|
class XGBRegressor(XGBScikitLearnBase, XGBRegressorBase):
|
|
28
30
|
"""
|
|
29
31
|
Implementation of the scikit-learn API for XGBoost regressor.
|
|
@@ -69,6 +71,9 @@ else:
|
|
|
69
71
|
A list of the form [L_1, L_2, ..., L_n], where each L_i is a list
|
|
70
72
|
of group weights on the i-th validation set.
|
|
71
73
|
"""
|
|
74
|
+
if y.ndim == 2:
|
|
75
|
+
kw["num_class"] = y.shape[1]
|
|
76
|
+
kw["output_ndim"] = 2
|
|
72
77
|
super().fit(
|
|
73
78
|
X,
|
|
74
79
|
y,
|
|
@@ -25,6 +25,7 @@ from ....serialization.serializables import (
|
|
|
25
25
|
DictField,
|
|
26
26
|
FieldTypes,
|
|
27
27
|
FunctionField,
|
|
28
|
+
Int16Field,
|
|
28
29
|
Int64Field,
|
|
29
30
|
KeyField,
|
|
30
31
|
ListField,
|
|
@@ -50,7 +51,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
|
|
|
50
51
|
dtrain = KeyField("dtrain", default=None)
|
|
51
52
|
evals = ListField("evals", on_serialize=_on_serialize_evals, default=None)
|
|
52
53
|
obj = FunctionField("obj", default=None)
|
|
53
|
-
feval = FunctionField("
|
|
54
|
+
feval = FunctionField("feval", default=None)
|
|
54
55
|
maximize = BoolField("maximize", default=None)
|
|
55
56
|
early_stopping_rounds = Int64Field("early_stopping_rounds", default=None)
|
|
56
57
|
verbose_eval = AnyField("verbose_eval", default=None)
|
|
@@ -64,8 +65,12 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
|
|
|
64
65
|
custom_metric = FunctionField("custom_metric", default=None)
|
|
65
66
|
num_boost_round = Int64Field("num_boost_round", default=10)
|
|
66
67
|
num_class = Int64Field("num_class", default=None)
|
|
68
|
+
_has_evals_result = BoolField("has_evals_result", default=False)
|
|
69
|
+
output_ndim = Int16Field("output_ndim", default=None)
|
|
67
70
|
|
|
68
71
|
def __init__(self, gpu=None, **kw):
|
|
72
|
+
if kw.get("evals_result") is not None:
|
|
73
|
+
kw["_has_evals_result"] = True
|
|
69
74
|
super().__init__(gpu=gpu, **kw)
|
|
70
75
|
if self.output_types is None:
|
|
71
76
|
self.output_types = [OutputType.object]
|
|
@@ -110,7 +115,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
|
|
|
110
115
|
|
|
111
116
|
@property
|
|
112
117
|
def has_evals_result(self) -> bool:
|
|
113
|
-
return self.evals
|
|
118
|
+
return self._has_evals_result or self.evals
|
|
114
119
|
|
|
115
120
|
|
|
116
121
|
def _get_xgb_booster(xgb_model):
|
maxframe/learn/core.py
CHANGED
|
@@ -222,6 +222,41 @@ class TransformerMixin:
|
|
|
222
222
|
return self.fit(X, y, **fit_params).transform(X)
|
|
223
223
|
|
|
224
224
|
|
|
225
|
+
class ClassifierMixin:
|
|
226
|
+
"""Mixin class for all classifiers in scikit-learn."""
|
|
227
|
+
|
|
228
|
+
_estimator_type = "classifier"
|
|
229
|
+
|
|
230
|
+
def score(self, X, y, sample_weight=None):
|
|
231
|
+
"""
|
|
232
|
+
Return the mean accuracy on the given test data and labels.
|
|
233
|
+
|
|
234
|
+
In multi-label classification, this is the subset accuracy
|
|
235
|
+
which is a harsh metric since you require for each sample that
|
|
236
|
+
each label set be correctly predicted.
|
|
237
|
+
|
|
238
|
+
Parameters
|
|
239
|
+
----------
|
|
240
|
+
X : array-like of shape (n_samples, n_features)
|
|
241
|
+
Test samples.
|
|
242
|
+
|
|
243
|
+
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
|
244
|
+
True labels for X.
|
|
245
|
+
|
|
246
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
|
247
|
+
Sample weights.
|
|
248
|
+
|
|
249
|
+
Returns
|
|
250
|
+
-------
|
|
251
|
+
score : Tensor
|
|
252
|
+
Mean accuracy of self.predict(X) wrt. y.
|
|
253
|
+
"""
|
|
254
|
+
from .metrics import accuracy_score
|
|
255
|
+
|
|
256
|
+
result = accuracy_score(y, self.predict(X), sample_weight=sample_weight)
|
|
257
|
+
return result
|
|
258
|
+
|
|
259
|
+
|
|
225
260
|
class RegressorMixin:
|
|
226
261
|
"""Mixin class for all regression estimators in scikit-learn."""
|
|
227
262
|
|
|
@@ -276,3 +311,34 @@ class RegressorMixin:
|
|
|
276
311
|
|
|
277
312
|
def _more_tags(self): # noqa: R0201 # pylint: disable=no-self-use
|
|
278
313
|
return {"requires_y": True}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class ClusterMixin:
|
|
317
|
+
"""Mixin class for all cluster estimators in scikit-learn."""
|
|
318
|
+
|
|
319
|
+
_estimator_type = "clusterer"
|
|
320
|
+
|
|
321
|
+
def fit_predict(self, X, y=None):
|
|
322
|
+
"""
|
|
323
|
+
Perform clustering on `X` and returns cluster labels.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
X : array-like of shape (n_samples, n_features)
|
|
328
|
+
Input data.
|
|
329
|
+
|
|
330
|
+
y : Ignored
|
|
331
|
+
Not used, present for API consistency by convention.
|
|
332
|
+
|
|
333
|
+
Returns
|
|
334
|
+
-------
|
|
335
|
+
labels : ndarray of shape (n_samples,), dtype=np.int64
|
|
336
|
+
Cluster labels.
|
|
337
|
+
"""
|
|
338
|
+
# non-optimized default implementation; override when a better
|
|
339
|
+
# method is possible for a given clustering algorithm
|
|
340
|
+
self.fit(X)
|
|
341
|
+
return self.labels_
|
|
342
|
+
|
|
343
|
+
def _more_tags(self):
|
|
344
|
+
return {"preserves_dtype": []}
|
|
@@ -17,7 +17,7 @@ from abc import ABCMeta, abstractmethod
|
|
|
17
17
|
|
|
18
18
|
from ... import tensor as mt
|
|
19
19
|
from ...tensor.datasource import tensor as astensor
|
|
20
|
-
from ..core import BaseEstimator
|
|
20
|
+
from ..core import BaseEstimator, ClassifierMixin
|
|
21
21
|
from ..preprocessing import normalize as f_normalize
|
|
22
22
|
from ..utils.validation import FLOAT_DTYPES, check_array
|
|
23
23
|
|
|
@@ -161,3 +161,60 @@ class LinearModel(BaseEstimator, metaclass=ABCMeta):
|
|
|
161
161
|
|
|
162
162
|
def _more_tags(self): # noqa: R0201 # pylint: disable=no-self-use
|
|
163
163
|
return {"requires_y": True}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class LinearClassifierMixin(ClassifierMixin):
|
|
167
|
+
"""Mixin for linear classifiers.
|
|
168
|
+
|
|
169
|
+
Handles prediction for sparse and dense X.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def decision_function(self, X):
|
|
173
|
+
"""
|
|
174
|
+
Predict confidence scores for samples.
|
|
175
|
+
|
|
176
|
+
The confidence score for a sample is proportional to the signed
|
|
177
|
+
distance of that sample to the hyperplane.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
X : array-like or sparse matrix, shape (n_samples, n_features)
|
|
182
|
+
Samples.
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
|
|
187
|
+
Confidence scores per (sample, class) combination. In the binary
|
|
188
|
+
case, confidence score for self.classes_[1] where >0 means this
|
|
189
|
+
class would be predicted.
|
|
190
|
+
"""
|
|
191
|
+
check_is_fitted(self)
|
|
192
|
+
|
|
193
|
+
X = check_array(X, accept_sparse="csr")
|
|
194
|
+
|
|
195
|
+
n_features = self.coef_.shape[1]
|
|
196
|
+
if X.shape[1] != n_features:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
"X has %d features per sample; expecting %d" % (X.shape[1], n_features)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
scores = mt.dot(X, self.coef_.T) + self.intercept_
|
|
202
|
+
return scores
|
|
203
|
+
|
|
204
|
+
def predict(self, X):
|
|
205
|
+
"""
|
|
206
|
+
Predict class labels for samples in X.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
X : array-like or sparse matrix, shape (n_samples, n_features)
|
|
211
|
+
Samples.
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
C : array, shape [n_samples]
|
|
216
|
+
Predicted class label per sample.
|
|
217
|
+
"""
|
|
218
|
+
scores = self.decision_function(X)
|
|
219
|
+
indices = scores.argmax(axis=1)
|
|
220
|
+
return self.classes_[indices].execute()
|
|
@@ -22,7 +22,7 @@ from ._base import LinearModel, _rescale_data
|
|
|
22
22
|
try:
|
|
23
23
|
from sklearn.base import MultiOutputMixin
|
|
24
24
|
except ImportError:
|
|
25
|
-
MultiOutputMixin = object
|
|
25
|
+
MultiOutputMixin = type("MultiOutputMixin", (object,), {})
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
|
|
@@ -17,9 +17,15 @@ from ._classification import (
|
|
|
17
17
|
accuracy_score,
|
|
18
18
|
f1_score,
|
|
19
19
|
fbeta_score,
|
|
20
|
+
log_loss,
|
|
20
21
|
multilabel_confusion_matrix,
|
|
21
22
|
precision_recall_fscore_support,
|
|
22
23
|
precision_score,
|
|
23
24
|
recall_score,
|
|
24
25
|
)
|
|
26
|
+
from ._ranking import auc, roc_auc_score, roc_curve
|
|
25
27
|
from ._regression import r2_score
|
|
28
|
+
from .pairwise import pairwise_distances
|
|
29
|
+
|
|
30
|
+
# isort: off
|
|
31
|
+
from ._scorer import get_scorer
|
|
@@ -33,6 +33,7 @@ from ...serialization.serializables import (
|
|
|
33
33
|
from ...tensor.core import TensorOrder
|
|
34
34
|
from ...typing_ import EntityType
|
|
35
35
|
from ..core import LearnOperatorMixin
|
|
36
|
+
from ..utils import check_array, check_consistent_length
|
|
36
37
|
from ._check_targets import _check_targets
|
|
37
38
|
|
|
38
39
|
|
|
@@ -163,6 +164,150 @@ def accuracy_score(
|
|
|
163
164
|
return score.execute(session=session, **(run_kwargs or dict()))
|
|
164
165
|
|
|
165
166
|
|
|
167
|
+
class LogLoss(Operator, LearnOperatorMixin):
|
|
168
|
+
_op_type_ = opcodes.LOG_LOSS
|
|
169
|
+
|
|
170
|
+
y_true = AnyField("y_true")
|
|
171
|
+
y_pred = AnyField("y_pred")
|
|
172
|
+
eps = Float64Field("eps", default=1e-15)
|
|
173
|
+
normalize = BoolField("normalize", default=True)
|
|
174
|
+
sample_weight = AnyField("sample_weight", default=None)
|
|
175
|
+
labels = AnyField("labels", default=None)
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def _set_inputs(cls, op: "LogLoss", inputs: List[EntityType]):
|
|
179
|
+
super()._set_inputs(op, inputs)
|
|
180
|
+
inputs_iter = iter(op.inputs)
|
|
181
|
+
op.y_true = next(inputs_iter)
|
|
182
|
+
op.y_pred = next(inputs_iter)
|
|
183
|
+
if isinstance(op.sample_weight, ENTITY_TYPE):
|
|
184
|
+
op.sample_weight = next(inputs_iter)
|
|
185
|
+
if isinstance(op.labels, ENTITY_TYPE):
|
|
186
|
+
op.labels = next(inputs_iter)
|
|
187
|
+
|
|
188
|
+
def __call__(self, y_true, y_pred, sample_weight=None, labels=None):
|
|
189
|
+
self._output_types = [OutputType.tensor]
|
|
190
|
+
self.sample_weight = sample_weight
|
|
191
|
+
self.labels = labels
|
|
192
|
+
inputs = [y_true, y_pred]
|
|
193
|
+
if isinstance(self.sample_weight, ENTITY_TYPE):
|
|
194
|
+
inputs.append(self.sample_weight)
|
|
195
|
+
if isinstance(self.labels, ENTITY_TYPE):
|
|
196
|
+
inputs.append(self.labels)
|
|
197
|
+
|
|
198
|
+
dtype = (
|
|
199
|
+
np.dtype(float)
|
|
200
|
+
if self.normalize
|
|
201
|
+
else np.result_type(y_true.dtype, y_pred.dtype)
|
|
202
|
+
)
|
|
203
|
+
return self.new_tileable(
|
|
204
|
+
inputs, dtype=dtype, shape=(), order=TensorOrder.C_ORDER
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def log_loss(
|
|
209
|
+
y_true,
|
|
210
|
+
y_pred,
|
|
211
|
+
*,
|
|
212
|
+
eps=1e-15,
|
|
213
|
+
normalize=True,
|
|
214
|
+
sample_weight=None,
|
|
215
|
+
labels=None,
|
|
216
|
+
execute=False,
|
|
217
|
+
session=None,
|
|
218
|
+
run_kwargs=None,
|
|
219
|
+
):
|
|
220
|
+
r"""Log loss, aka logistic loss or cross-entropy loss.
|
|
221
|
+
|
|
222
|
+
This is the loss function used in (multinomial) logistic regression
|
|
223
|
+
and extensions of it such as neural networks, defined as the negative
|
|
224
|
+
log-likelihood of a logistic model that returns ``y_pred`` probabilities
|
|
225
|
+
for its training data ``y_true``.
|
|
226
|
+
The log loss is only defined for two or more labels.
|
|
227
|
+
For a single sample with true label :math:`y \in \{0,1\}` and
|
|
228
|
+
and a probability estimate :math:`p = \operatorname{Pr}(y = 1)`, the log
|
|
229
|
+
loss is:
|
|
230
|
+
|
|
231
|
+
.. math::
|
|
232
|
+
L_{\log}(y, p) = -(y \log (p) + (1 - y) \log (1 - p))
|
|
233
|
+
|
|
234
|
+
Read more in the :ref:`User Guide <log_loss>`.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
y_true : array-like or label indicator matrix
|
|
239
|
+
Ground truth (correct) labels for n_samples samples.
|
|
240
|
+
|
|
241
|
+
y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
|
|
242
|
+
Predicted probabilities, as returned by a classifier's
|
|
243
|
+
predict_proba method. If ``y_pred.shape = (n_samples,)``
|
|
244
|
+
the probabilities provided are assumed to be that of the
|
|
245
|
+
positive class. The labels in ``y_pred`` are assumed to be
|
|
246
|
+
ordered alphabetically, as done by
|
|
247
|
+
:class:`preprocessing.LabelBinarizer`.
|
|
248
|
+
|
|
249
|
+
eps : float, default=1e-15
|
|
250
|
+
Log loss is undefined for p=0 or p=1, so probabilities are
|
|
251
|
+
clipped to max(eps, min(1 - eps, p)).
|
|
252
|
+
|
|
253
|
+
normalize : bool, default=True
|
|
254
|
+
If true, return the mean loss per sample.
|
|
255
|
+
Otherwise, return the sum of the per-sample losses.
|
|
256
|
+
|
|
257
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
|
258
|
+
Sample weights.
|
|
259
|
+
|
|
260
|
+
labels : array-like, default=None
|
|
261
|
+
If not provided, labels will be inferred from y_true. If ``labels``
|
|
262
|
+
is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
|
|
263
|
+
assumed to be binary and are inferred from ``y_true``.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
loss : float
|
|
268
|
+
|
|
269
|
+
Notes
|
|
270
|
+
-----
|
|
271
|
+
The logarithm used is the natural logarithm (base-e).
|
|
272
|
+
|
|
273
|
+
Examples
|
|
274
|
+
--------
|
|
275
|
+
>>> from maxframe.learn.metrics import log_loss
|
|
276
|
+
>>> log_loss(["spam", "ham", "ham", "spam"],
|
|
277
|
+
... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
|
|
278
|
+
0.21616...
|
|
279
|
+
|
|
280
|
+
References
|
|
281
|
+
----------
|
|
282
|
+
C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
|
|
283
|
+
p. 209.
|
|
284
|
+
"""
|
|
285
|
+
if not isinstance(y_true, (ENTITY_TYPE, np.ndarray)):
|
|
286
|
+
y_true = mt.array(y_true)
|
|
287
|
+
if not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
|
|
288
|
+
y_pred = mt.array(y_pred)
|
|
289
|
+
if sample_weight is not None and not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
|
|
290
|
+
sample_weight = mt.array(sample_weight)
|
|
291
|
+
if labels is not None and not isinstance(labels, (ENTITY_TYPE, np.ndarray)):
|
|
292
|
+
labels = mt.array(labels)
|
|
293
|
+
|
|
294
|
+
y_pred = check_array(y_pred, ensure_2d=False)
|
|
295
|
+
y_pred, y_true, sample_weight = check_consistent_length(
|
|
296
|
+
y_pred, y_true, sample_weight
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
op = LogLoss(eps=eps, normalize=normalize)
|
|
300
|
+
res = op(
|
|
301
|
+
y_true=y_true,
|
|
302
|
+
y_pred=y_pred,
|
|
303
|
+
sample_weight=sample_weight,
|
|
304
|
+
labels=labels,
|
|
305
|
+
)
|
|
306
|
+
if execute:
|
|
307
|
+
return res.execute(session=session, **(run_kwargs or {}))
|
|
308
|
+
return res
|
|
309
|
+
|
|
310
|
+
|
|
166
311
|
class MultiLabelConfusionMatrix(Operator, LearnOperatorMixin):
|
|
167
312
|
_op_type_ = opcodes.MULTILABEL_CONFUSION_MATRIX
|
|
168
313
|
|