maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...core import ENTITY_TYPE, EntityData, ExecutableTuple, OutputType
|
|
21
|
+
from ...core.operator import Operator
|
|
22
|
+
from ...serialization.serializables import (
|
|
23
|
+
AnyField,
|
|
24
|
+
BoolField,
|
|
25
|
+
Float64Field,
|
|
26
|
+
KeyField,
|
|
27
|
+
StringField,
|
|
28
|
+
)
|
|
29
|
+
from ...tensor import tensor as astensor
|
|
30
|
+
from ...utils import cache_tileables
|
|
31
|
+
from ..core import LearnOperatorMixin
|
|
32
|
+
from ..utils.validation import check_consistent_length, column_or_1d
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AucOp(Operator, LearnOperatorMixin):
|
|
36
|
+
_op_type_ = opcodes.AUC
|
|
37
|
+
|
|
38
|
+
x = KeyField("x", default=None)
|
|
39
|
+
y = KeyField("y", default=None)
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def _set_inputs(cls, op: "AucOp", inputs: List[EntityData]):
|
|
43
|
+
super()._set_inputs(op, inputs)
|
|
44
|
+
it = iter(inputs)
|
|
45
|
+
for attr in ("x", "y"):
|
|
46
|
+
if isinstance(getattr(op, attr, None), ENTITY_TYPE):
|
|
47
|
+
setattr(op, attr, next(it))
|
|
48
|
+
|
|
49
|
+
def __call__(self, x, y):
|
|
50
|
+
self._output_types = [OutputType.scalar]
|
|
51
|
+
inputs = [x, y]
|
|
52
|
+
inputs = [v for v in inputs if isinstance(v, ENTITY_TYPE)]
|
|
53
|
+
return self.new_tileable(inputs, shape=(), dtype=np.float64)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class RocAucScore(Operator, LearnOperatorMixin):
|
|
57
|
+
_op_type_ = opcodes.ROC_AUC_SCORE
|
|
58
|
+
|
|
59
|
+
y_true = KeyField("y_true", default=None)
|
|
60
|
+
y_score = KeyField("y_score", default=None)
|
|
61
|
+
average = StringField("average", default="macro")
|
|
62
|
+
sample_weight = KeyField("sample_weight", default=None)
|
|
63
|
+
max_fpr = Float64Field("max_fpr", default=None)
|
|
64
|
+
multi_class = StringField("multi_class", default="ovr")
|
|
65
|
+
labels = AnyField("labels", default=None)
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def _set_inputs(cls, op: "RocAucScore", inputs: List[EntityData]):
|
|
69
|
+
super()._set_inputs(op, inputs)
|
|
70
|
+
it = iter(inputs)
|
|
71
|
+
for attr in ("y_true", "y_score", "sample_weight", "labels"):
|
|
72
|
+
if isinstance(getattr(op, attr, None), ENTITY_TYPE):
|
|
73
|
+
setattr(op, attr, next(it))
|
|
74
|
+
|
|
75
|
+
def __call__(self, y_true, y_score, sample_weight=None, labels=None):
|
|
76
|
+
inputs = [y_true, y_score, sample_weight, labels]
|
|
77
|
+
inputs = [x for x in inputs if isinstance(x, ENTITY_TYPE)]
|
|
78
|
+
self._output_types = [OutputType.tensor]
|
|
79
|
+
shape = () if self.average is not None else (np.nan,)
|
|
80
|
+
return self.new_tileable(inputs, shape=shape, dtype=np.float64)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class RocCurve(Operator, LearnOperatorMixin):
|
|
84
|
+
_op_type_ = opcodes.ROC_CURVE
|
|
85
|
+
|
|
86
|
+
y_true = KeyField("y_true", default=None)
|
|
87
|
+
y_score = KeyField("y_score", default=None)
|
|
88
|
+
pos_label = AnyField("pos_label", default=None)
|
|
89
|
+
sample_weight = KeyField("sample_weight", default=None)
|
|
90
|
+
drop_intermediate = BoolField("drop_intermediate", default=True)
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def output_limit(self) -> int:
|
|
94
|
+
return 3
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def _set_inputs(cls, op: "RocAucScore", inputs: List[EntityData]):
|
|
98
|
+
super()._set_inputs(op, inputs)
|
|
99
|
+
it = iter(inputs)
|
|
100
|
+
for attr in ("y_true", "y_score", "sample_weight"):
|
|
101
|
+
if isinstance(getattr(op, attr, None), ENTITY_TYPE):
|
|
102
|
+
setattr(op, attr, next(it))
|
|
103
|
+
|
|
104
|
+
def __call__(self, y_true, y_score, sample_weight=None):
|
|
105
|
+
inputs = [y_true, y_score, sample_weight]
|
|
106
|
+
inputs = [x for x in inputs if isinstance(x, ENTITY_TYPE)]
|
|
107
|
+
self._output_types = [OutputType.tensor] * 3
|
|
108
|
+
kws = [
|
|
109
|
+
{"shape": (np.nan,), "dtype": np.dtype(float)},
|
|
110
|
+
{"shape": (np.nan,), "dtype": np.dtype(float)},
|
|
111
|
+
{"shape": (np.nan,), "dtype": np.dtype(float)},
|
|
112
|
+
]
|
|
113
|
+
return self.new_tileables(inputs, kws=kws)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def auc(x, y, execute=False, session=None, run_kwargs=None):
|
|
117
|
+
"""Compute Area Under the Curve (AUC) using the trapezoidal rule
|
|
118
|
+
|
|
119
|
+
This is a general function, given points on a curve. For computing the
|
|
120
|
+
area under the ROC-curve, see :func:`roc_auc_score`. For an alternative
|
|
121
|
+
way to summarize a precision-recall curve, see
|
|
122
|
+
:func:`average_precision_score`.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
x : tensor, shape = [n]
|
|
127
|
+
x coordinates. These must be either monotonic increasing or monotonic
|
|
128
|
+
decreasing.
|
|
129
|
+
y : tensor, shape = [n]
|
|
130
|
+
y coordinates.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
auc : tensor, with float value
|
|
135
|
+
|
|
136
|
+
Examples
|
|
137
|
+
--------
|
|
138
|
+
>>> import maxframe.tensor as mt
|
|
139
|
+
>>> from maxframe.learn import metrics
|
|
140
|
+
>>> y = mt.array([1, 1, 2, 2])
|
|
141
|
+
>>> pred = mt.array([0.1, 0.4, 0.35, 0.8])
|
|
142
|
+
>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=2)
|
|
143
|
+
>>> metrics.auc(fpr, tpr).execute()
|
|
144
|
+
0.75
|
|
145
|
+
|
|
146
|
+
See also
|
|
147
|
+
--------
|
|
148
|
+
roc_auc_score : Compute the area under the ROC curve
|
|
149
|
+
average_precision_score : Compute average precision from prediction scores
|
|
150
|
+
precision_recall_curve :
|
|
151
|
+
Compute precision-recall pairs for different probability thresholds
|
|
152
|
+
"""
|
|
153
|
+
x, y = check_consistent_length(x, y)
|
|
154
|
+
x = column_or_1d(x)
|
|
155
|
+
y = column_or_1d(y)
|
|
156
|
+
|
|
157
|
+
op = AucOp(x=x, y=y)
|
|
158
|
+
ret = op(x, y)
|
|
159
|
+
if execute:
|
|
160
|
+
return ret.execute(session=session, **(run_kwargs or dict()))
|
|
161
|
+
return ret
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def roc_auc_score(
|
|
165
|
+
y_true,
|
|
166
|
+
y_score,
|
|
167
|
+
*,
|
|
168
|
+
average="macro",
|
|
169
|
+
sample_weight=None,
|
|
170
|
+
max_fpr=None,
|
|
171
|
+
multi_class="raise",
|
|
172
|
+
labels=None,
|
|
173
|
+
execute=False,
|
|
174
|
+
session=None,
|
|
175
|
+
run_kwargs=None,
|
|
176
|
+
):
|
|
177
|
+
"""
|
|
178
|
+
Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
|
|
179
|
+
from prediction scores.
|
|
180
|
+
|
|
181
|
+
Note: this implementation can be used with binary, multiclass and
|
|
182
|
+
multilabel classification, but some restrictions apply (see Parameters).
|
|
183
|
+
|
|
184
|
+
Read more in the :ref:`User Guide <roc_metrics>`.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
y_true : array-like of shape (n_samples,) or (n_samples, n_classes)
|
|
189
|
+
True labels or binary label indicators. The binary and multiclass cases
|
|
190
|
+
expect labels with shape (n_samples,) while the multilabel case expects
|
|
191
|
+
binary label indicators with shape (n_samples, n_classes).
|
|
192
|
+
|
|
193
|
+
y_score : array-like of shape (n_samples,) or (n_samples, n_classes)
|
|
194
|
+
Target scores.
|
|
195
|
+
|
|
196
|
+
* In the binary case, it corresponds to an array of shape
|
|
197
|
+
`(n_samples,)`. Both probability estimates and non-thresholded
|
|
198
|
+
decision values can be provided. The probability estimates correspond
|
|
199
|
+
to the **probability of the class with the greater label**,
|
|
200
|
+
i.e. `estimator.classes_[1]` and thus
|
|
201
|
+
`estimator.predict_proba(X, y)[:, 1]`. The decision values
|
|
202
|
+
corresponds to the output of `estimator.decision_function(X, y)`.
|
|
203
|
+
See more information in the :ref:`User guide <roc_auc_binary>`;
|
|
204
|
+
* In the multiclass case, it corresponds to an array of shape
|
|
205
|
+
`(n_samples, n_classes)` of probability estimates provided by the
|
|
206
|
+
`predict_proba` method. The probability estimates **must**
|
|
207
|
+
sum to 1 across the possible classes. In addition, the order of the
|
|
208
|
+
class scores must correspond to the order of ``labels``,
|
|
209
|
+
if provided, or else to the numerical or lexicographical order of
|
|
210
|
+
the labels in ``y_true``. See more information in the
|
|
211
|
+
:ref:`User guide <roc_auc_multiclass>`;
|
|
212
|
+
* In the multilabel case, it corresponds to an array of shape
|
|
213
|
+
`(n_samples, n_classes)`. Probability estimates are provided by the
|
|
214
|
+
`predict_proba` method and the non-thresholded decision values by
|
|
215
|
+
the `decision_function` method. The probability estimates correspond
|
|
216
|
+
to the **probability of the class with the greater label for each
|
|
217
|
+
output** of the classifier. See more information in the
|
|
218
|
+
:ref:`User guide <roc_auc_multilabel>`.
|
|
219
|
+
|
|
220
|
+
average : {'micro', 'macro', 'samples', 'weighted'} or None, \
|
|
221
|
+
default='macro'
|
|
222
|
+
If ``None``, the scores for each class are returned. Otherwise,
|
|
223
|
+
this determines the type of averaging performed on the data:
|
|
224
|
+
Note: multiclass ROC AUC currently only handles the 'macro' and
|
|
225
|
+
'weighted' averages.
|
|
226
|
+
|
|
227
|
+
``'micro'``:
|
|
228
|
+
Calculate metrics globally by considering each element of the label
|
|
229
|
+
indicator matrix as a label.
|
|
230
|
+
``'macro'``:
|
|
231
|
+
Calculate metrics for each label, and find their unweighted
|
|
232
|
+
mean. This does not take label imbalance into account.
|
|
233
|
+
``'weighted'``:
|
|
234
|
+
Calculate metrics for each label, and find their average, weighted
|
|
235
|
+
by support (the number of true instances for each label).
|
|
236
|
+
``'samples'``:
|
|
237
|
+
Calculate metrics for each instance, and find their average.
|
|
238
|
+
|
|
239
|
+
Will be ignored when ``y_true`` is binary.
|
|
240
|
+
|
|
241
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
|
242
|
+
Sample weights.
|
|
243
|
+
|
|
244
|
+
max_fpr : float > 0 and <= 1, default=None
|
|
245
|
+
If not ``None``, the standardized partial AUC [2]_ over the range
|
|
246
|
+
[0, max_fpr] is returned. For the multiclass case, ``max_fpr``,
|
|
247
|
+
should be either equal to ``None`` or ``1.0`` as AUC ROC partial
|
|
248
|
+
computation currently is not supported for multiclass.
|
|
249
|
+
|
|
250
|
+
multi_class : {'raise', 'ovr', 'ovo'}, default='raise'
|
|
251
|
+
Only used for multiclass targets. Determines the type of configuration
|
|
252
|
+
to use. The default value raises an error, so either
|
|
253
|
+
``'ovr'`` or ``'ovo'`` must be passed explicitly.
|
|
254
|
+
|
|
255
|
+
``'ovr'``:
|
|
256
|
+
Stands for One-vs-rest. Computes the AUC of each class
|
|
257
|
+
against the rest [3]_ [4]_. This
|
|
258
|
+
treats the multiclass case in the same way as the multilabel case.
|
|
259
|
+
Sensitive to class imbalance even when ``average == 'macro'``,
|
|
260
|
+
because class imbalance affects the composition of each of the
|
|
261
|
+
'rest' groupings.
|
|
262
|
+
``'ovo'``:
|
|
263
|
+
Stands for One-vs-one. Computes the average AUC of all
|
|
264
|
+
possible pairwise combinations of classes [5]_.
|
|
265
|
+
Insensitive to class imbalance when
|
|
266
|
+
``average == 'macro'``.
|
|
267
|
+
|
|
268
|
+
labels : array-like of shape (n_classes,), default=None
|
|
269
|
+
Only used for multiclass targets. List of labels that index the
|
|
270
|
+
classes in ``y_score``. If ``None``, the numerical or lexicographical
|
|
271
|
+
order of the labels in ``y_true`` is used.
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
auc : float
|
|
276
|
+
|
|
277
|
+
References
|
|
278
|
+
----------
|
|
279
|
+
.. [1] `Wikipedia entry for the Receiver operating characteristic
|
|
280
|
+
<https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
|
|
281
|
+
|
|
282
|
+
.. [2] `Analyzing a portion of the ROC curve. McClish, 1989
|
|
283
|
+
<https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
|
|
284
|
+
|
|
285
|
+
.. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving
|
|
286
|
+
probability estimation trees (Section 6.2), CeDER Working Paper
|
|
287
|
+
#IS-00-04, Stern School of Business, New York University.
|
|
288
|
+
|
|
289
|
+
.. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern
|
|
290
|
+
Recognition Letters, 27(8), 861-874.
|
|
291
|
+
<https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_
|
|
292
|
+
|
|
293
|
+
.. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area
|
|
294
|
+
Under the ROC Curve for Multiple Class Classification Problems.
|
|
295
|
+
Machine Learning, 45(2), 171-186.
|
|
296
|
+
<http://link.springer.com/article/10.1023/A:1010920819831>`_
|
|
297
|
+
|
|
298
|
+
See Also
|
|
299
|
+
--------
|
|
300
|
+
average_precision_score : Area under the precision-recall curve.
|
|
301
|
+
roc_curve : Compute Receiver operating characteristic (ROC) curve.
|
|
302
|
+
RocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic
|
|
303
|
+
(ROC) curve given an estimator and some data.
|
|
304
|
+
RocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic
|
|
305
|
+
(ROC) curve given the true and predicted values.
|
|
306
|
+
|
|
307
|
+
Examples
|
|
308
|
+
--------
|
|
309
|
+
Binary case:
|
|
310
|
+
|
|
311
|
+
>>> from sklearn.datasets import load_breast_cancer
|
|
312
|
+
>>> from sklearn.linear_model import LogisticRegression
|
|
313
|
+
>>> from maxframe.learn.metrics import roc_auc_score
|
|
314
|
+
>>> X, y = load_breast_cancer(return_X_y=True)
|
|
315
|
+
>>> clf = LogisticRegression(solver="liblinear", random_state=0).fit(X, y)
|
|
316
|
+
>>> roc_auc_score(y, clf.predict_proba(X)[:, 1]).execute()
|
|
317
|
+
0.99...
|
|
318
|
+
>>> roc_auc_score(y, clf.decision_function(X)).execute()
|
|
319
|
+
0.99...
|
|
320
|
+
|
|
321
|
+
Multiclass case:
|
|
322
|
+
|
|
323
|
+
>>> from sklearn.datasets import load_iris
|
|
324
|
+
>>> X, y = load_iris(return_X_y=True)
|
|
325
|
+
>>> clf = LogisticRegression(solver="liblinear").fit(X, y)
|
|
326
|
+
>>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr').execute()
|
|
327
|
+
0.99...
|
|
328
|
+
|
|
329
|
+
Multilabel case:
|
|
330
|
+
|
|
331
|
+
>>> import numpy as np
|
|
332
|
+
>>> from sklearn.datasets import make_multilabel_classification
|
|
333
|
+
>>> from sklearn.multioutput import MultiOutputClassifier
|
|
334
|
+
>>> X, y = make_multilabel_classification(random_state=0)
|
|
335
|
+
>>> clf = MultiOutputClassifier(clf).fit(X, y)
|
|
336
|
+
>>> # get a list of n_output containing probability arrays of shape
|
|
337
|
+
>>> # (n_samples, n_classes)
|
|
338
|
+
>>> y_pred = clf.predict_proba(X)
|
|
339
|
+
>>> # extract the positive columns for each output
|
|
340
|
+
>>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])
|
|
341
|
+
>>> roc_auc_score(y, y_pred, average=None).execute()
|
|
342
|
+
array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])
|
|
343
|
+
>>> from sklearn.linear_model import RidgeClassifierCV
|
|
344
|
+
>>> clf = RidgeClassifierCV().fit(X, y)
|
|
345
|
+
>>> roc_auc_score(y, clf.decision_function(X), average=None).execute()
|
|
346
|
+
array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])
|
|
347
|
+
"""
|
|
348
|
+
cache_tileables(y_true, y_score)
|
|
349
|
+
|
|
350
|
+
y_true = astensor(y_true)
|
|
351
|
+
y_score = astensor(y_score)
|
|
352
|
+
sample_weight = column_or_1d(sample_weight) if sample_weight is not None else None
|
|
353
|
+
|
|
354
|
+
op = RocAucScore(
|
|
355
|
+
y_true=y_true,
|
|
356
|
+
y_score=y_score,
|
|
357
|
+
average=average,
|
|
358
|
+
sample_weight=sample_weight,
|
|
359
|
+
max_fpr=max_fpr,
|
|
360
|
+
multi_class=multi_class,
|
|
361
|
+
labels=labels,
|
|
362
|
+
)
|
|
363
|
+
ret = op(y_true, y_score, sample_weight=sample_weight, labels=labels)
|
|
364
|
+
if execute:
|
|
365
|
+
ret = ret.execute(session=session, **(run_kwargs or dict()))
|
|
366
|
+
return ret
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def roc_curve(
|
|
370
|
+
y_true,
|
|
371
|
+
y_score,
|
|
372
|
+
pos_label=None,
|
|
373
|
+
sample_weight=None,
|
|
374
|
+
drop_intermediate=True,
|
|
375
|
+
execute=False,
|
|
376
|
+
session=None,
|
|
377
|
+
run_kwargs=None,
|
|
378
|
+
):
|
|
379
|
+
"""Compute Receiver operating characteristic (ROC)
|
|
380
|
+
|
|
381
|
+
Note: this implementation is restricted to the binary classification task.
|
|
382
|
+
|
|
383
|
+
Read more in the :ref:`User Guide <roc_metrics>`.
|
|
384
|
+
|
|
385
|
+
Parameters
|
|
386
|
+
----------
|
|
387
|
+
|
|
388
|
+
y_true : tensor, shape = [n_samples]
|
|
389
|
+
True binary labels. If labels are not either {-1, 1} or {0, 1}, then
|
|
390
|
+
pos_label should be explicitly given.
|
|
391
|
+
|
|
392
|
+
y_score : tensor, shape = [n_samples]
|
|
393
|
+
Target scores, can either be probability estimates of the positive
|
|
394
|
+
class, confidence values, or non-thresholded measure of decisions
|
|
395
|
+
(as returned by "decision_function" on some classifiers).
|
|
396
|
+
|
|
397
|
+
pos_label : int or str, default=None
|
|
398
|
+
The label of the positive class.
|
|
399
|
+
When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},
|
|
400
|
+
``pos_label`` is set to 1, otherwise an error will be raised.
|
|
401
|
+
|
|
402
|
+
sample_weight : array-like of shape (n_samples,), default=None
|
|
403
|
+
Sample weights.
|
|
404
|
+
|
|
405
|
+
drop_intermediate : boolean, optional (default=True)
|
|
406
|
+
Whether to drop some suboptimal thresholds which would not appear
|
|
407
|
+
on a plotted ROC curve. This is useful in order to create lighter
|
|
408
|
+
ROC curves.
|
|
409
|
+
|
|
410
|
+
Returns
|
|
411
|
+
-------
|
|
412
|
+
fpr : tensor, shape = [>2]
|
|
413
|
+
Increasing false positive rates such that element i is the false
|
|
414
|
+
positive rate of predictions with score >= thresholds[i].
|
|
415
|
+
|
|
416
|
+
tpr : tensor, shape = [>2]
|
|
417
|
+
Increasing true positive rates such that element i is the true
|
|
418
|
+
positive rate of predictions with score >= thresholds[i].
|
|
419
|
+
|
|
420
|
+
thresholds : tensor, shape = [n_thresholds]
|
|
421
|
+
Decreasing thresholds on the decision function used to compute
|
|
422
|
+
fpr and tpr. `thresholds[0]` represents no instances being predicted
|
|
423
|
+
and is arbitrarily set to `max(y_score) + 1`.
|
|
424
|
+
|
|
425
|
+
See also
|
|
426
|
+
--------
|
|
427
|
+
roc_auc_score : Compute the area under the ROC curve
|
|
428
|
+
|
|
429
|
+
Notes
|
|
430
|
+
-----
|
|
431
|
+
Since the thresholds are sorted from low to high values, they
|
|
432
|
+
are reversed upon returning them to ensure they correspond to both ``fpr``
|
|
433
|
+
and ``tpr``, which are sorted in reversed order during their calculation.
|
|
434
|
+
|
|
435
|
+
References
|
|
436
|
+
----------
|
|
437
|
+
.. [1] `Wikipedia entry for the Receiver operating characteristic
|
|
438
|
+
<https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
|
|
439
|
+
|
|
440
|
+
.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
|
|
441
|
+
Letters, 2006, 27(8):861-874.
|
|
442
|
+
|
|
443
|
+
Examples
|
|
444
|
+
--------
|
|
445
|
+
>>> import maxframe.tensor as mt
|
|
446
|
+
>>> from maxframe.learn import metrics
|
|
447
|
+
>>> y = mt.array([1, 1, 2, 2])
|
|
448
|
+
>>> scores = mt.array([0.1, 0.4, 0.35, 0.8])
|
|
449
|
+
>>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)
|
|
450
|
+
>>> fpr
|
|
451
|
+
array([0. , 0. , 0.5, 0.5, 1. ])
|
|
452
|
+
>>> tpr
|
|
453
|
+
array([0. , 0.5, 0.5, 1. , 1. ])
|
|
454
|
+
>>> thresholds
|
|
455
|
+
array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])
|
|
456
|
+
|
|
457
|
+
"""
|
|
458
|
+
cache_tileables(y_true, y_score)
|
|
459
|
+
y_true = column_or_1d(y_true)
|
|
460
|
+
y_score = column_or_1d(y_score)
|
|
461
|
+
sample_weight = column_or_1d(sample_weight) if sample_weight is not None else None
|
|
462
|
+
|
|
463
|
+
op = RocCurve(
|
|
464
|
+
y_true=y_true,
|
|
465
|
+
y_score=y_score,
|
|
466
|
+
pos_label=pos_label,
|
|
467
|
+
sample_weight=sample_weight,
|
|
468
|
+
drop_intermediate=drop_intermediate,
|
|
469
|
+
)
|
|
470
|
+
ret = op(y_true, y_score, sample_weight)
|
|
471
|
+
if execute:
|
|
472
|
+
ret = (
|
|
473
|
+
ExecutableTuple(ret)
|
|
474
|
+
.execute(session=session, **(run_kwargs or dict()))
|
|
475
|
+
.fetch(session=session, **(run_kwargs or dict()))
|
|
476
|
+
)
|
|
477
|
+
return ExecutableTuple(ret)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Callable, Union
|
|
16
|
+
|
|
17
|
+
from . import accuracy_score, log_loss, r2_score
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from sklearn.metrics import make_scorer
|
|
21
|
+
except ImportError:
|
|
22
|
+
make_scorer = lambda *_, **__: None
|
|
23
|
+
|
|
24
|
+
accuracy_score = make_scorer(accuracy_score)
|
|
25
|
+
r2_score = make_scorer(r2_score)
|
|
26
|
+
neg_log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
SCORERS = dict(
|
|
30
|
+
r2=r2_score,
|
|
31
|
+
accuracy=accuracy_score,
|
|
32
|
+
neg_log_loss=neg_log_loss_scorer,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_scorer(score_func: Union[str, Callable], **kwargs) -> Callable:
|
|
37
|
+
"""
|
|
38
|
+
Get a scorer from string
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
score_func : str | callable
|
|
43
|
+
scoring method as string. If callable it is returned as is.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
scorer : callable
|
|
48
|
+
The scorer.
|
|
49
|
+
"""
|
|
50
|
+
if isinstance(score_func, str):
|
|
51
|
+
try:
|
|
52
|
+
scorer = SCORERS[score_func]
|
|
53
|
+
except KeyError:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
"{} is not a valid scoring value. "
|
|
56
|
+
"Valid options are {}".format(score_func, sorted(SCORERS))
|
|
57
|
+
)
|
|
58
|
+
return scorer
|
|
59
|
+
else:
|
|
60
|
+
return make_scorer(score_func, **kwargs)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .cosine import cosine_distances, cosine_similarity
|
|
16
|
+
from .euclidean import euclidean_distances
|
|
17
|
+
from .haversine import haversine_distances
|
|
18
|
+
from .manhattan import manhattan_distances
|
|
19
|
+
from .pairwise import PAIRWISE_DISTANCE_FUNCTIONS, pairwise_distances
|
|
20
|
+
from .pairwise_distances_topk import pairwise_distances_topk
|
|
21
|
+
from .rbf_kernel import rbf_kernel
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from ....serialization.serializables import Int64Field
|
|
18
|
+
from ....tensor import tensor as astensor
|
|
19
|
+
from ....tensor.operators import TensorOperator, TensorOperatorMixin
|
|
20
|
+
from ...utils.validation import check_array
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PairwiseDistances(TensorOperator, TensorOperatorMixin):
|
|
24
|
+
_op_module_ = "learn"
|
|
25
|
+
|
|
26
|
+
chunk_store_limit = Int64Field("chunk_store_limit")
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def _return_float_dtype(X, Y):
|
|
30
|
+
"""
|
|
31
|
+
1. If dtype of X and Y is float32, then dtype float32 is returned.
|
|
32
|
+
2. Else dtype float is returned.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
X = astensor(X)
|
|
36
|
+
|
|
37
|
+
if Y is None:
|
|
38
|
+
Y_dtype = X.dtype
|
|
39
|
+
else:
|
|
40
|
+
Y = astensor(Y)
|
|
41
|
+
Y_dtype = Y.dtype
|
|
42
|
+
|
|
43
|
+
if X.dtype == Y_dtype == np.float32:
|
|
44
|
+
dtype = np.float32
|
|
45
|
+
else:
|
|
46
|
+
dtype = float
|
|
47
|
+
|
|
48
|
+
return X, Y, dtype
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def check_pairwise_arrays(X, Y, precomputed=False, dtype=None):
|
|
52
|
+
X, Y, dtype_float = PairwiseDistances._return_float_dtype(X, Y)
|
|
53
|
+
|
|
54
|
+
estimator = "check_pairwise_arrays"
|
|
55
|
+
if dtype is None:
|
|
56
|
+
dtype = dtype_float
|
|
57
|
+
|
|
58
|
+
if Y is X or Y is None:
|
|
59
|
+
X = Y = check_array(X, accept_sparse=True, dtype=dtype, estimator=estimator)
|
|
60
|
+
else:
|
|
61
|
+
X = check_array(X, accept_sparse=True, dtype=dtype, estimator=estimator)
|
|
62
|
+
Y = check_array(Y, accept_sparse=True, dtype=dtype, estimator=estimator)
|
|
63
|
+
|
|
64
|
+
if precomputed:
|
|
65
|
+
if X.shape[1] != Y.shape[0]:
|
|
66
|
+
raise ValueError(
|
|
67
|
+
"Precomputed metric requires shape "
|
|
68
|
+
f"(n_queries, n_indexed). Got ({X.shape[0]}, {X.shape[1]}) "
|
|
69
|
+
f"for {Y.shape[0]} indexed."
|
|
70
|
+
)
|
|
71
|
+
elif X.shape[1] != Y.shape[1]:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"Incompatible dimension for X and Y matrices: "
|
|
74
|
+
f"X.shape[1] == {X.shape[1]} while Y.shape[1] == {Y.shape[1]}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return X, Y
|