PyPI - maxframe - Versions diffs - 2.0.0b2__cp38-cp38-win_amd64.whl → 2.2.0__cp38-cp38-win_amd64.whl - Mend

maxframe 2.0.0b2__cp38-cp38-win_amd64.whl → 2.2.0__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cp38-win_amd64.pyd +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +86 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +0 -2
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/learn/contrib/llm/core.py CHANGED Viewed

@@ -23,6 +23,7 @@ from ....core.operator.core import TileableOperatorMixin
 from ....dataframe.core import SERIES_TYPE
 from ....dataframe.operators import DataFrameOperatorMixin
 from ....dataframe.utils import parse_index
+from ....serialization.serializables import Int32Field
 from ....serialization.serializables.core import Serializable
 from ....serialization.serializables.field import AnyField, DictField, StringField
@@ -39,6 +40,7 @@ class LLMTaskOperator(Operator, DataFrameOperatorMixin):
     model = AnyField("model", default=None)
     params = DictField("params", default=None)
     running_options: Dict[str, Any] = DictField("running_options", default=None)
+    timeout = Int32Field("timeout", default=None)
     def __init__(self, output_types=None, **kw):
         if output_types is None:

maxframe/learn/contrib/xgboost/core.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import numpy as np
 from ....errors import TileableNotExecutedError
+from ....udf import builtin_function
 try:
     import xgboost
@@ -27,9 +28,16 @@ except ImportError:
     xgboost = None
 from ....core import OutputType
+from ...utils.odpsio import ToODPSModelMixin
 from ..models import ModelApplyChunk, ModelWithEval, ModelWithEvalData, to_remote_model
 from .dmatrix import DMatrix
+_xgb_type_to_np_type = {
+    "float": "float32",
+    "int": "int32",
+    "i": "bool",
+}
 class BoosterData(ModelWithEvalData):
     @staticmethod
@@ -88,6 +96,58 @@ class BoosterData(ModelWithEvalData):
             strict_shape=strict_shape,
         )
+    @staticmethod
+    @builtin_function
+    def _get_training_info(bst, evals_result, local_info):
+        model_infos = {
+            "iteration": bst.num_boosted_rounds(),
+        }
+        if evals_result:
+            model_infos.update(
+                dict(
+                    duration_ms=evals_result.get("duration_ms"),
+                )
+            )
+        if bst.feature_names:
+            model_infos["feature_names"] = bst.feature_names
+            model_infos["feature_types"] = [
+                _xgb_type_to_np_type[x] for x in bst.feature_types
+            ]
+        model_infos.update(local_info or {})
+        try:
+            config = json.loads(bst.save_config())
+            stack = [config]
+            internal = {}
+            while stack:
+                obj = stack.pop()
+                for k, v in obj.items():
+                    if k.endswith("_param"):
+                        for p_k, p_v in v.items():
+                            internal[p_k] = p_v
+                    elif isinstance(v, dict):
+                        stack.append(v)
+            for k, v in internal.items():
+                for t in (int, float, str):
+                    try:
+                        model_infos[k] = t(v)
+                        break
+                    except ValueError:
+                        continue
+        except ValueError:
+            pass
+        return model_infos
+    def get_training_info(self, evals_result: dict = None, local_info: dict = None):
+        evals_result = getattr(self, "_evals_result", None) or evals_result
+        args = (evals_result, local_info)
+        op = ModelApplyChunk(
+            func=self._get_training_info, output_types=[OutputType.object]
+        )
+        return op(self, [{}], args=args)[0]
 class Booster(ModelWithEval):
     pass
@@ -97,7 +157,7 @@ if not xgboost:
     XGBScikitLearnBase = None
 else:
-    class XGBScikitLearnBase(xgboost.XGBModel):
+    class XGBScikitLearnBase(xgboost.XGBModel, ToODPSModelMixin):
         """
         Base class for implementing scikit-learn interface
         """
@@ -181,6 +241,7 @@ else:
                 **train_kw,
             )
             self._Booster = result
+            self.evals_result_t_ = result.op.outputs[-1]
             return self
         def predict(self, data, **kw):
@@ -276,6 +337,30 @@ else:
                 n_features=self._n_features_in,
             )[0]
+        @property
+        def training_info_(self):
+            local_info = {}
+            attrs = [
+                "n_classes_",
+                "learning_rate",
+            ]
+            for attr in attrs:
+                if getattr(self, attr, None):
+                    local_info[attr] = getattr(self, attr)
+            return self._Booster.get_training_info(
+                evals_result=self.evals_result_t_, local_info=local_info
+            )
+        def _get_odps_model_info(self) -> ToODPSModelMixin.ODPSModelInfo:
+            model_format = (
+                "BOOSTED_TREE_CLASSIFIER"
+                if hasattr(self, "predict_proba")
+                else "BOOSTED_TREE_REGRESSOR"
+            )
+            return ToODPSModelMixin.ODPSModelInfo(
+                model_format=model_format, model_params=self._Booster
+            )
     def wrap_evaluation_matrices(
         missing: float,
         X: Any,

maxframe/learn/contrib/xgboost/train.py CHANGED Viewed

@@ -50,7 +50,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
     dtrain = KeyField("dtrain", default=None)
     evals = ListField("evals", on_serialize=_on_serialize_evals, default=None)
     obj = FunctionField("obj", default=None)
-    feval = FunctionField("obj", default=None)
+    feval = FunctionField("feval", default=None)
     maximize = BoolField("maximize", default=None)
     early_stopping_rounds = Int64Field("early_stopping_rounds", default=None)
     verbose_eval = AnyField("verbose_eval", default=None)
@@ -64,8 +64,11 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
     custom_metric = FunctionField("custom_metric", default=None)
     num_boost_round = Int64Field("num_boost_round", default=10)
     num_class = Int64Field("num_class", default=None)
+    _has_evals_result = BoolField("has_evals_result", default=False)
     def __init__(self, gpu=None, **kw):
+        if kw.get("evals_result") is not None:
+            kw["_has_evals_result"] = True
         super().__init__(gpu=gpu, **kw)
         if self.output_types is None:
             self.output_types = [OutputType.object]
@@ -110,7 +113,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
     @property
     def has_evals_result(self) -> bool:
-        return self.evals
+        return self._has_evals_result or self.evals
 def _get_xgb_booster(xgb_model):

maxframe/learn/core.py CHANGED Viewed

@@ -222,6 +222,41 @@ class TransformerMixin:
             return self.fit(X, y, **fit_params).transform(X)
+class ClassifierMixin:
+    """Mixin class for all classifiers in scikit-learn."""
+    _estimator_type = "classifier"
+    def score(self, X, y, sample_weight=None):
+        """
+        Return the mean accuracy on the given test data and labels.
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Test samples.
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            True labels for X.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights.
+        Returns
+        -------
+        score : Tensor
+            Mean accuracy of self.predict(X) wrt. y.
+        """
+        from .metrics import accuracy_score
+        result = accuracy_score(y, self.predict(X), sample_weight=sample_weight)
+        return result
 class RegressorMixin:
     """Mixin class for all regression estimators in scikit-learn."""
@@ -276,3 +311,34 @@ class RegressorMixin:
     def _more_tags(self):  # noqa: R0201  # pylint: disable=no-self-use
         return {"requires_y": True}
+class ClusterMixin:
+    """Mixin class for all cluster estimators in scikit-learn."""
+    _estimator_type = "clusterer"
+    def fit_predict(self, X, y=None):
+        """
+        Perform clustering on `X` and returns cluster labels.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        Returns
+        -------
+        labels : ndarray of shape (n_samples,), dtype=np.int64
+            Cluster labels.
+        """
+        # non-optimized default implementation; override when a better
+        # method is possible for a given clustering algorithm
+        self.fit(X)
+        return self.labels_
+    def _more_tags(self):
+        return {"preserves_dtype": []}

maxframe/learn/linear_model/_base.py CHANGED Viewed

@@ -17,7 +17,7 @@ from abc import ABCMeta, abstractmethod
 from ... import tensor as mt
 from ...tensor.datasource import tensor as astensor
-from ..core import BaseEstimator
+from ..core import BaseEstimator, ClassifierMixin
 from ..preprocessing import normalize as f_normalize
 from ..utils.validation import FLOAT_DTYPES, check_array
@@ -161,3 +161,60 @@ class LinearModel(BaseEstimator, metaclass=ABCMeta):
     def _more_tags(self):  # noqa: R0201  # pylint: disable=no-self-use
         return {"requires_y": True}
+class LinearClassifierMixin(ClassifierMixin):
+    """Mixin for linear classifiers.
+    Handles prediction for sparse and dense X.
+    """
+    def decision_function(self, X):
+        """
+        Predict confidence scores for samples.
+        The confidence score for a sample is proportional to the signed
+        distance of that sample to the hyperplane.
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape (n_samples, n_features)
+            Samples.
+        Returns
+        -------
+        array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
+            Confidence scores per (sample, class) combination. In the binary
+            case, confidence score for self.classes_[1] where >0 means this
+            class would be predicted.
+        """
+        check_is_fitted(self)
+        X = check_array(X, accept_sparse="csr")
+        n_features = self.coef_.shape[1]
+        if X.shape[1] != n_features:
+            raise ValueError(
+                "X has %d features per sample; expecting %d" % (X.shape[1], n_features)
+            )
+        scores = mt.dot(X, self.coef_.T) + self.intercept_
+        return scores
+    def predict(self, X):
+        """
+        Predict class labels for samples in X.
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape (n_samples, n_features)
+            Samples.
+        Returns
+        -------
+        C : array, shape [n_samples]
+            Predicted class label per sample.
+        """
+        scores = self.decision_function(X)
+        indices = scores.argmax(axis=1)
+        return self.classes_[indices].execute()

maxframe/learn/linear_model/_lin_reg.py CHANGED Viewed

@@ -22,7 +22,7 @@ from ._base import LinearModel, _rescale_data
 try:
     from sklearn.base import MultiOutputMixin
 except ImportError:
-    MultiOutputMixin = object
+    MultiOutputMixin = type("MultiOutputMixin", (object,), {})
 class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):

maxframe/learn/metrics/__init__.py CHANGED Viewed

@@ -17,9 +17,15 @@ from ._classification import (
     accuracy_score,
     f1_score,
     fbeta_score,
+    log_loss,
     multilabel_confusion_matrix,
     precision_recall_fscore_support,
     precision_score,
     recall_score,
 )
+from ._ranking import auc, roc_auc_score, roc_curve
 from ._regression import r2_score
+from .pairwise import pairwise_distances
+# isort: off
+from ._scorer import get_scorer

maxframe/learn/metrics/_classification.py CHANGED Viewed

@@ -33,6 +33,7 @@ from ...serialization.serializables import (
 from ...tensor.core import TensorOrder
 from ...typing_ import EntityType
 from ..core import LearnOperatorMixin
+from ..utils import check_array, check_consistent_length
 from ._check_targets import _check_targets
@@ -163,6 +164,150 @@ def accuracy_score(
     return score.execute(session=session, **(run_kwargs or dict()))
+class LogLoss(Operator, LearnOperatorMixin):
+    _op_type_ = opcodes.LOG_LOSS
+    y_true = AnyField("y_true")
+    y_pred = AnyField("y_pred")
+    eps = Float64Field("eps", default=1e-15)
+    normalize = BoolField("normalize", default=True)
+    sample_weight = AnyField("sample_weight", default=None)
+    labels = AnyField("labels", default=None)
+    @classmethod
+    def _set_inputs(cls, op: "LogLoss", inputs: List[EntityType]):
+        super()._set_inputs(op, inputs)
+        inputs_iter = iter(op.inputs)
+        op.y_true = next(inputs_iter)
+        op.y_pred = next(inputs_iter)
+        if isinstance(op.sample_weight, ENTITY_TYPE):
+            op.sample_weight = next(inputs_iter)
+        if isinstance(op.labels, ENTITY_TYPE):
+            op.labels = next(inputs_iter)
+    def __call__(self, y_true, y_pred, sample_weight=None, labels=None):
+        self._output_types = [OutputType.tensor]
+        self.sample_weight = sample_weight
+        self.labels = labels
+        inputs = [y_true, y_pred]
+        if isinstance(self.sample_weight, ENTITY_TYPE):
+            inputs.append(self.sample_weight)
+        if isinstance(self.labels, ENTITY_TYPE):
+            inputs.append(self.labels)
+        dtype = (
+            np.dtype(float)
+            if self.normalize
+            else np.result_type(y_true.dtype, y_pred.dtype)
+        )
+        return self.new_tileable(
+            inputs, dtype=dtype, shape=(), order=TensorOrder.C_ORDER
+        )
+def log_loss(
+    y_true,
+    y_pred,
+    *,
+    eps=1e-15,
+    normalize=True,
+    sample_weight=None,
+    labels=None,
+    execute=False,
+    session=None,
+    run_kwargs=None,
+):
+    r"""Log loss, aka logistic loss or cross-entropy loss.
+    This is the loss function used in (multinomial) logistic regression
+    and extensions of it such as neural networks, defined as the negative
+    log-likelihood of a logistic model that returns ``y_pred`` probabilities
+    for its training data ``y_true``.
+    The log loss is only defined for two or more labels.
+    For a single sample with true label :math:`y \in \{0,1\}` and
+    and a probability estimate :math:`p = \operatorname{Pr}(y = 1)`, the log
+    loss is:
+    .. math::
+        L_{\log}(y, p) = -(y \log (p) + (1 - y) \log (1 - p))
+    Read more in the :ref:`User Guide <log_loss>`.
+    Parameters
+    ----------
+    y_true : array-like or label indicator matrix
+        Ground truth (correct) labels for n_samples samples.
+    y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
+        Predicted probabilities, as returned by a classifier's
+        predict_proba method. If ``y_pred.shape = (n_samples,)``
+        the probabilities provided are assumed to be that of the
+        positive class. The labels in ``y_pred`` are assumed to be
+        ordered alphabetically, as done by
+        :class:`preprocessing.LabelBinarizer`.
+    eps : float, default=1e-15
+        Log loss is undefined for p=0 or p=1, so probabilities are
+        clipped to max(eps, min(1 - eps, p)).
+    normalize : bool, default=True
+        If true, return the mean loss per sample.
+        Otherwise, return the sum of the per-sample losses.
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights.
+    labels : array-like, default=None
+        If not provided, labels will be inferred from y_true. If ``labels``
+        is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
+        assumed to be binary and are inferred from ``y_true``.
+    Returns
+    -------
+    loss : float
+    Notes
+    -----
+    The logarithm used is the natural logarithm (base-e).
+    Examples
+    --------
+    >>> from maxframe.learn.metrics import log_loss
+    >>> log_loss(["spam", "ham", "ham", "spam"],
+    ...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
+    0.21616...
+    References
+    ----------
+    C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
+    p. 209.
+    """
+    if not isinstance(y_true, (ENTITY_TYPE, np.ndarray)):
+        y_true = mt.array(y_true)
+    if not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
+        y_pred = mt.array(y_pred)
+    if sample_weight is not None and not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
+        sample_weight = mt.array(sample_weight)
+    if labels is not None and not isinstance(labels, (ENTITY_TYPE, np.ndarray)):
+        labels = mt.array(labels)
+    y_pred = check_array(y_pred, ensure_2d=False)
+    y_pred, y_true, sample_weight = check_consistent_length(
+        y_pred, y_true, sample_weight
+    )
+    op = LogLoss(eps=eps, normalize=normalize)
+    res = op(
+        y_true=y_true,
+        y_pred=y_pred,
+        sample_weight=sample_weight,
+        labels=labels,
+    )
+    if execute:
+        return res.execute(session=session, **(run_kwargs or {}))
+    return res
 class MultiLabelConfusionMatrix(Operator, LearnOperatorMixin):
     _op_type_ = opcodes.MULTILABEL_CONFUSION_MATRIX