PyPI - maxframe - Versions diffs - 2.2.0__cp38-cp38-win_amd64.whl → 2.3.0rc1__cp38-cp38-win_amd64.whl - Mend

maxframe 2.2.0__cp38-cp38-win_amd64.whl → 2.3.0rc1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (114) hide show

maxframe/_utils.cp38-win_amd64.pyd +0 -0
maxframe/codegen/core.py +3 -2
maxframe/codegen/spe/dataframe/merge.py +4 -0
maxframe/codegen/spe/dataframe/misc.py +2 -0
maxframe/codegen/spe/dataframe/reduction.py +18 -0
maxframe/codegen/spe/dataframe/sort.py +9 -1
maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
maxframe/codegen/spe/dataframe/tseries.py +9 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
maxframe/codegen/spe/tensor/datasource.py +1 -0
maxframe/config/config.py +3 -0
maxframe/conftest.py +10 -0
maxframe/core/base.py +2 -1
maxframe/core/entity/tileables.py +2 -0
maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
maxframe/core/graph/entity.py +7 -1
maxframe/core/mode.py +6 -1
maxframe/dataframe/__init__.py +2 -2
maxframe/dataframe/arithmetic/__init__.py +4 -0
maxframe/dataframe/arithmetic/maximum.py +33 -0
maxframe/dataframe/arithmetic/minimum.py +33 -0
maxframe/dataframe/core.py +98 -106
maxframe/dataframe/datasource/core.py +6 -0
maxframe/dataframe/datasource/direct.py +57 -0
maxframe/dataframe/datasource/read_csv.py +19 -11
maxframe/dataframe/datasource/read_odps_query.py +29 -6
maxframe/dataframe/datasource/read_odps_table.py +32 -10
maxframe/dataframe/datasource/read_parquet.py +38 -39
maxframe/dataframe/datastore/__init__.py +6 -0
maxframe/dataframe/datastore/direct.py +268 -0
maxframe/dataframe/datastore/to_odps.py +6 -0
maxframe/dataframe/extensions/flatjson.py +2 -1
maxframe/dataframe/groupby/__init__.py +5 -1
maxframe/dataframe/groupby/aggregation.py +10 -6
maxframe/dataframe/groupby/apply_chunk.py +1 -3
maxframe/dataframe/groupby/core.py +20 -4
maxframe/dataframe/indexing/__init__.py +2 -1
maxframe/dataframe/indexing/insert.py +45 -17
maxframe/dataframe/merge/__init__.py +3 -0
maxframe/dataframe/merge/combine.py +244 -0
maxframe/dataframe/misc/__init__.py +14 -3
maxframe/dataframe/misc/check_unique.py +41 -10
maxframe/dataframe/misc/drop.py +31 -0
maxframe/dataframe/misc/infer_dtypes.py +251 -0
maxframe/dataframe/misc/map.py +31 -18
maxframe/dataframe/misc/repeat.py +159 -0
maxframe/dataframe/misc/tests/test_misc.py +35 -1
maxframe/dataframe/missing/checkna.py +3 -2
maxframe/dataframe/reduction/__init__.py +10 -5
maxframe/dataframe/reduction/aggregation.py +6 -6
maxframe/dataframe/reduction/argmax.py +7 -4
maxframe/dataframe/reduction/argmin.py +7 -4
maxframe/dataframe/reduction/core.py +18 -9
maxframe/dataframe/reduction/mode.py +144 -0
maxframe/dataframe/reduction/nunique.py +10 -3
maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
maxframe/dataframe/sort/__init__.py +9 -2
maxframe/dataframe/sort/argsort.py +7 -1
maxframe/dataframe/sort/core.py +1 -1
maxframe/dataframe/sort/rank.py +147 -0
maxframe/dataframe/tseries/__init__.py +19 -0
maxframe/dataframe/tseries/at_time.py +61 -0
maxframe/dataframe/tseries/between_time.py +122 -0
maxframe/dataframe/utils.py +30 -26
maxframe/learn/contrib/llm/core.py +16 -7
maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/config.py +221 -0
maxframe/learn/contrib/llm/deploy/core.py +247 -0
maxframe/learn/contrib/llm/deploy/framework.py +35 -0
maxframe/learn/contrib/llm/deploy/loader.py +360 -0
maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
maxframe/learn/contrib/llm/models/__init__.py +1 -0
maxframe/learn/contrib/llm/models/dashscope.py +12 -6
maxframe/learn/contrib/llm/models/managed.py +76 -11
maxframe/learn/contrib/llm/models/openai.py +72 -0
maxframe/learn/contrib/llm/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/tests/test_core.py +34 -0
maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
maxframe/learn/contrib/llm/text.py +348 -42
maxframe/learn/contrib/models.py +4 -1
maxframe/learn/contrib/xgboost/classifier.py +2 -0
maxframe/learn/contrib/xgboost/core.py +31 -7
maxframe/learn/contrib/xgboost/predict.py +4 -2
maxframe/learn/contrib/xgboost/regressor.py +5 -0
maxframe/learn/contrib/xgboost/train.py +2 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
maxframe/learn/utils/__init__.py +1 -0
maxframe/learn/utils/extmath.py +42 -9
maxframe/learn/utils/odpsio.py +80 -11
maxframe/lib/filesystem/_oss_lib/common.py +2 -0
maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
maxframe/opcodes.py +9 -1
maxframe/remote/core.py +4 -0
maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
maxframe/serialization/tests/test_serial.py +2 -2
maxframe/tensor/arithmetic/__init__.py +1 -1
maxframe/tensor/arithmetic/core.py +2 -2
maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
maxframe/tensor/core.py +3 -0
maxframe/tensor/misc/copyto.py +1 -1
maxframe/tests/test_udf.py +61 -0
maxframe/tests/test_utils.py +8 -5
maxframe/udf.py +103 -7
maxframe/utils.py +61 -8
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
maxframe_client/session/task.py +8 -1
maxframe_client/tests/test_session.py +24 -0
maxframe/dataframe/arrays.py +0 -864
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0

maxframe/learn/contrib/xgboost/regressor.py CHANGED Viewed

@@ -14,6 +14,7 @@
 from typing import Union
+from ...utils.odpsio import register_odps_model
 from ..utils import make_import_error_func
 from .core import XGBScikitLearnBase, xgboost
@@ -24,6 +25,7 @@ else:
     from .predict import predict
+    @register_odps_model
     class XGBRegressor(XGBScikitLearnBase, XGBRegressorBase):
         """
         Implementation of the scikit-learn API for XGBoost regressor.
@@ -69,6 +71,9 @@ else:
                 A list of the form [L_1, L_2, ..., L_n], where each L_i is a list
                 of group weights on the i-th validation set.
             """
+            if y.ndim == 2:
+                kw["num_class"] = y.shape[1]
+                kw["output_ndim"] = 2
             super().fit(
                 X,
                 y,

maxframe/learn/contrib/xgboost/train.py CHANGED Viewed

@@ -25,6 +25,7 @@ from ....serialization.serializables import (
     DictField,
     FieldTypes,
     FunctionField,
+    Int16Field,
     Int64Field,
     KeyField,
     ListField,
@@ -65,6 +66,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
     num_boost_round = Int64Field("num_boost_round", default=10)
     num_class = Int64Field("num_class", default=None)
     _has_evals_result = BoolField("has_evals_result", default=False)
+    output_ndim = Int16Field("output_ndim", default=None)
     def __init__(self, gpu=None, **kw):
         if kw.get("evals_result") is not None:

maxframe/learn/preprocessing/_data/min_max_scaler.py CHANGED Viewed

@@ -106,10 +106,11 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
     <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
     """
-    def __init__(self, feature_range=(0, 1), copy=True, clip=False):
+    def __init__(self, feature_range=(0, 1), copy=True, clip=False, validate=True):
         self.feature_range = feature_range
         self.copy = copy
         self.clip = clip
+        self.validate = validate
     def _reset(self):  # pragma: no cover
         """Reset internal data-dependent state of the scaler, if necessary.
@@ -186,13 +187,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
             )
         first_pass = not hasattr(self, "n_samples_seen_")
-        X = self._validate_data(
-            X,
-            reset=first_pass,
-            estimator=self,
-            dtype=FLOAT_DTYPES,
-            force_all_finite="allow-nan",
-        )
+        if self.validate:
+            X = self._validate_data(
+                X,
+                reset=first_pass,
+                estimator=self,
+                dtype=FLOAT_DTYPES,
+                force_all_finite="allow-nan",
+            )
         if isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE, INDEX_TYPE)):
             data_min = X.min(axis=0)
@@ -239,13 +241,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
         """
         check_is_fitted(self)
-        X = self._validate_data(
-            X,
-            copy=self.copy,
-            dtype=FLOAT_DTYPES,
-            force_all_finite="allow-nan",
-            reset=False,
-        )
+        if self.validate:
+            X = self._validate_data(
+                X,
+                copy=self.copy,
+                dtype=FLOAT_DTYPES,
+                force_all_finite="allow-nan",
+                reset=False,
+            )
         X *= self.scale_
         X += self.min_
@@ -290,6 +293,7 @@ def minmax_scale(
     *,
     axis=0,
     copy=True,
+    validate=True,
     execute=False,
     session=None,
     run_kwargs=None
@@ -368,21 +372,28 @@ def minmax_scale(
     """  # noqa
     # Unlike the scaler object, this function allows 1d input.
     # If copy is required, it will be done inside the scaler object.
-    X = check_array(
-        X, copy=False, ensure_2d=False, dtype=FLOAT_DTYPES, force_all_finite="allow-nan"
-    )
-    original_ndim = X.ndim
+    if validate:
+        X = check_array(
+            X,
+            copy=False,
+            ensure_2d=False,
+            dtype=FLOAT_DTYPES,
+            force_all_finite="allow-nan",
+        )
+        original_ndim = X.ndim
-    if original_ndim == 1:
-        X = X.reshape(X.shape[0], 1)
+        if original_ndim == 1:
+            X = X.reshape(X.shape[0], 1)
+    else:
+        original_ndim = X.ndim
-    s = MinMaxScaler(feature_range=feature_range, copy=copy)
+    s = MinMaxScaler(feature_range=feature_range, copy=copy, validate=validate)
     if axis == 0:
         X = s.fit_transform(X)
     else:
         X = s.fit_transform(X.T).T
-    if original_ndim == 1:
+    if validate and original_ndim == 1:
         X = X.ravel()
     if not execute:

maxframe/learn/preprocessing/_data/standard_scaler.py CHANGED Viewed

@@ -156,10 +156,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
     [[3. 3.]]
     """
-    def __init__(self, *, copy=True, with_mean=True, with_std=True):
+    def __init__(self, *, copy=True, with_mean=True, with_std=True, validate=True):
         self.with_mean = with_mean
         self.with_std = with_std
         self.copy = copy
+        self.validate = validate
     def _reset(self):
         """Reset internal data-dependent state of the scaler, if necessary.
@@ -246,14 +247,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
             Fitted scaler.
         """
         first_call = not hasattr(self, "n_samples_seen_")
-        X = self._validate_data(
-            X,
-            accept_sparse=("csr", "csc"),
-            dtype=FLOAT_DTYPES,
-            force_all_finite="allow-nan",
-            reset=first_call,
-        )
-        n_features = X.shape[1]
+        if self.validate:
+            X = self._validate_data(
+                X,
+                accept_sparse=("csr", "csc"),
+                dtype=FLOAT_DTYPES,
+                force_all_finite="allow-nan",
+                reset=first_call,
+            )
+        n_features = X.shape[1] if X.ndim == 2 else 1
         if sample_weight is not None:
             sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
@@ -267,7 +269,9 @@ class StandardScaler(TransformerMixin, BaseEstimator):
         # incr_mean_variance_axis and _incremental_variance_axis
         dtype = np.int64 if sample_weight is None else X.dtype
         if not hasattr(self, "n_samples_seen_"):
-            self.n_samples_seen_ = mt.zeros(n_features, dtype=dtype)
+            self.n_samples_seen_ = (
+                mt.zeros(n_features, dtype=dtype) if X.ndim == 2 else 0
+            )
         # elif np.size(self.n_samples_seen_) == 1:
         #     self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])
         #     self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)
@@ -309,9 +313,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
             constant_mask = _is_constant_feature(
                 self.var_, self.mean_, self.n_samples_seen_
             )
-            self.scale_ = _handle_zeros_in_scale(
-                mt.sqrt(self.var_), copy=False, constant_mask=constant_mask
-            )
+            self.scale_ = mt.sqrt(self.var_)
+            if self.validate:
+                self.scale_ = _handle_zeros_in_scale(
+                    self.scale_, copy=False, constant_mask=constant_mask
+                )
         else:
             self.scale_ = None
@@ -337,14 +343,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
         check_is_fitted(self)
         copy = copy if copy is not None else self.copy
-        X = self._validate_data(
-            X,
-            reset=False,
-            accept_sparse="csr",
-            copy=copy,
-            dtype=FLOAT_DTYPES,
-            force_all_finite="allow-nan",
-        )
+        if self.validate:
+            X = self._validate_data(
+                X,
+                reset=False,
+                accept_sparse="csr",
+                copy=copy,
+                dtype=FLOAT_DTYPES,
+                force_all_finite="allow-nan",
+            )
         if sparse.issparse(X):
             raise NotImplementedError("Scaling on sparse tensors is not supported")
@@ -397,7 +404,7 @@ class StandardScaler(TransformerMixin, BaseEstimator):
         return X
-def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
+def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True, validate=True):
     """Standardize a dataset along any axis.
     Center to the mean and component wise scale to unit variance.
@@ -488,16 +495,18 @@ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
         X = mt.tensor(X)
     ndim = X.ndim
-    if ndim == 1:
+    if validate and ndim == 1:
         X = X.reshape((X.shape[0], 1))
     if axis == 1:
         X = X.T
-    scaler = StandardScaler(with_mean=with_mean, with_std=with_std, copy=copy)
+    scaler = StandardScaler(
+        with_mean=with_mean, with_std=with_std, copy=copy, validate=validate
+    )
     transformed = scaler.fit_transform(X)
     if axis == 1:
         transformed = transformed.T
-    if ndim == 1:
+    if validate and ndim == 1:
         transformed = transformed.reshape(transformed.shape[0])
     return transformed

maxframe/learn/utils/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 from .core import convert_to_tensor_or_dataframe
 from .multiclass import check_classification_targets
+from .odpsio import read_odps_model
 from .shuffle import shuffle
 from .sparsefuncs import count_nonzero
 from .validation import check_array, check_consistent_length

maxframe/learn/utils/extmath.py CHANGED Viewed

@@ -15,6 +15,9 @@
 import numpy as np
 from ... import tensor as mt
+from ...core import ENTITY_TYPE
+from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
+from ...tensor.datasource import TensorZeros
 # Use at least float64 for the accumulating functions to avoid precision issue
@@ -42,7 +45,11 @@ def _safe_accumulator_op(op, x, *args, **kwargs):
     -------
     result : The output of the accumulator function passed to this function
     """
-    if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:
+    if (
+        hasattr(x, "dtype")
+        and np.issubdtype(x.dtype, np.floating)
+        and x.dtype.itemsize < 8
+    ):
         result = op(x, *args, **kwargs, dtype=np.float64)
     else:
         result = op(x, *args, **kwargs)
@@ -117,16 +124,31 @@ def _incremental_mean_and_var(
     `utils.sparsefuncs.incr_mean_variance_axis` and
     `utils.sparsefuncs_fast.incr_mean_variance_axis0`
     """
+    has_last_sample = isinstance(last_sample_count, ENTITY_TYPE) and not isinstance(
+        last_sample_count.op, TensorZeros
+    )
+    is_df_type = isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE))
     # old = stats until now
     # new = the current increment
     # updated = the aggregated stats
-    last_sum = last_mean * last_sample_count
+    last_sum = last_mean * last_sample_count if has_last_sample else 0
     X_nan_mask = mt.isnan(X)
     # if mt.any(X_nan_mask):
     #     sum_op = mt.nansum
     # else:
     #     sum_op = mt.sum
-    sum_op = mt.nansum
+    def df_sum(val, **kw):
+        if "dtype" in kw:
+            val = val.astype(kw.pop("dtype"))
+        return val.sum(**kw)
+    if is_df_type:
+        sum_op = df_sum
+    else:
+        sum_op = mt.nansum
     if sample_weight is not None:
         # equivalent to np.nansum(X * sample_weight, axis=0)
         # safer because np.float64(X*W) != np.float64(X)*np.float64(W)
@@ -138,10 +160,16 @@ def _incremental_mean_and_var(
         )
     else:
         new_sum = _safe_accumulator_op(sum_op, X, axis=0)
-        n_samples = X.shape[0]
-        new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
+        if is_df_type:
+            new_sample_count = X.count()
+        else:
+            n_samples = X.shape[0]
+            new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
-    updated_sample_count = last_sample_count + new_sample_count
+    if not has_last_sample:
+        updated_sample_count = new_sample_count
+    else:
+        updated_sample_count = last_sample_count + new_sample_count
     updated_mean = (last_sum + new_sum) / updated_sample_count
@@ -170,7 +198,9 @@ def _incremental_mean_and_var(
         # and recommendations", by Chan, Golub, and LeVeque.
         new_unnormalized_variance -= correction**2 / new_sample_count
-        last_unnormalized_variance = last_variance * last_sample_count
+        last_unnormalized_variance = (
+            last_variance * last_sample_count if has_last_sample else 0
+        )
         with mt.errstate(divide="ignore", invalid="ignore"):
             last_over_new_count = last_sample_count / new_sample_count
@@ -182,8 +212,11 @@ def _incremental_mean_and_var(
                 * (last_sum / last_over_new_count - new_sum) ** 2
             )
-        zeros = last_sample_count == 0
-        updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
+        if not has_last_sample:
+            updated_unnormalized_variance = new_unnormalized_variance
+        else:
+            zeros = last_sample_count == 0
+            updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
         updated_variance = updated_unnormalized_variance / updated_sample_count
     return updated_mean, updated_variance, updated_sample_count

maxframe/learn/utils/odpsio.py CHANGED Viewed

@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, List, NamedTuple, Optional
+from typing import Any, Dict, List, NamedTuple, Optional, Set
+from odps import ODPS
 from ... import opcodes
 from ...core import ENTITY_TYPE, EntityData, OutputType
@@ -27,6 +29,36 @@ from ...serialization.serializables import (
 from ...utils import find_objects, replace_objects
 from ..core import LearnOperatorMixin
+_odps_model_classes: Set["ODPSModelMixin"] = set()
+def register_odps_model(model_cls: "ODPSModelMixin"):
+    _odps_model_classes.add(model_cls)
+    return model_cls
+class ReadODPSModel(ObjectOperator, LearnOperatorMixin):
+    _op_type_ = opcodes.READ_ODPS_MODEL
+    model_name = StringField("model_name", default=None)
+    model_version = StringField("model_version", default=None)
+    format = StringField("format", default=None)
+    location = StringField("location", default=None)
+    storage_options = DictField("storage_options", default=None)
+    def has_custom_code(self) -> bool:
+        return True
+    def __call__(self):
+        if not self.format.startswith("BOOSTED_TREE_"):
+            # todo support more model formats
+            raise ValueError("Only support boosted tree format")
+        for model_cls in _odps_model_classes:
+            ret = model_cls._build_odps_source_model(self)
+            if ret is not None:
+                return ret
+        raise ValueError(f"Model {self.model_name} not supported")
 class ToODPSModel(ObjectOperator, LearnOperatorMixin):
     _op_type_ = opcodes.TO_ODPS_MODEL
@@ -74,17 +106,21 @@ class ToODPSModel(ObjectOperator, LearnOperatorMixin):
         return self.new_tileable(inputs, shape=())
-class ToODPSModelMixin:
+class ODPSModelMixin:
     class ODPSModelInfo(NamedTuple):
         model_format: str
         model_params: Any
+    @classmethod
+    def _build_odps_source_model(cls, op: ReadODPSModel) -> Any:
+        return None
     def _get_odps_model_info(self) -> ODPSModelInfo:
         raise NotImplementedError
     def to_odps_model(
         self,
-        model_name: str = None,
+        model_name: str,
         model_version: str = None,
         schema: str = None,
         project: str = None,
@@ -167,14 +203,7 @@ class ToODPSModelMixin:
         ...                       "role_arn": "acs:ram::<user_id>:role/aliyunodpsdefaultrole"
         ...                   }).execute()
         """
-        if "." not in model_name:
-            if project and not schema:
-                schema = "default"
-            if schema:
-                model_name = f"{schema}.{model_name}"
-            if project:
-                model_name = f"{project}.{model_name}"
+        model_name = _build_odps_model_name(model_name, schema, project)
         model_info = self._get_odps_model_info()
         op = ToODPSModel(
@@ -191,3 +220,43 @@ class ToODPSModelMixin:
             storage_options=storage_options,
         )
         return op(getattr(self, "training_info_"), model_info.model_params)
+def _build_odps_model_name(model_name: str, schema: str, project: str = None):
+    if "." not in model_name:
+        if project and not schema:
+            schema = "default"
+        if schema:
+            model_name = f"{schema}.{model_name}"
+        if project:
+            model_name = f"{project}.{model_name}"
+    return model_name
+def read_odps_model(
+    model_name: str,
+    schema: str = None,
+    project: str = None,
+    model_version: str = None,
+    odps_entry: ODPS = None,
+):
+    odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
+    if not hasattr(odps_entry, "get_model"):
+        raise RuntimeError("Need to install pyodps>=0.11.5 to use read_odps_model")
+    model_obj = odps_entry.get_model(model_name, project, schema)
+    if model_version:
+        model_obj = model_obj.versions[model_version]
+    # check if model exists
+    model_obj.reload()
+    full_model_name = _build_odps_model_name(model_name, schema, project)
+    location = model_obj.path
+    format = model_obj.type.value
+    op = ReadODPSModel(
+        model_name=full_model_name,
+        model_version=model_version,
+        location=location,
+        format=format,
+    )
+    return op()

maxframe/lib/filesystem/_oss_lib/common.py CHANGED Viewed

@@ -40,6 +40,8 @@ class OSSFileEntry:
         self._storage_options = storage_options
     def is_dir(self):
+        if self._path.endswith("/"):
+            self._is_dir = True
         if self._is_dir is None:
             self._is_dir = oss_isdir(self._path)
         return self._is_dir

maxframe/lib/mmh3.cp38-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/opcodes.py CHANGED Viewed

@@ -271,9 +271,9 @@ SEM = 352
 STR_CONCAT = 353
 MAD = 354
 MEDIAN = 355
-RANK = 356
 IDXMAX = 357
 IDXMIN = 358
+MODE = 359
 # tensor operator
 RESHAPE = 401
@@ -398,6 +398,9 @@ REORDER_LEVELS = 747
 DATAFRAME_COMPARE = 748
 DROPLEVEL = 749
 DATAFRAME_UPDATE = 750
+DATAFRAME_COMBINE = 751
+DATAFRAME_INFER_DTYPES = 752
+BETWEEN_TIME = 753
 FUSE = 801
@@ -409,6 +412,9 @@ MANAGED_MULTI_MODAL_GENERATION = 813
 LLM_TEXT_SUMMARIZE_TASK = 814
 LLM_TEXT_TRANSLATE_TASK = 815
 LLM_TEXT_CLASSIFY_TASK = 816
+LLM_TEXT_EXTRACT_TASK = 817
+LLM_TEXT_EMBEDDING_TASK = 818
+OPENAI_COMPATIBLE_TEXT_GENERATION = 819
 # table like input for tensor
 TABLE_COO = 1003
@@ -456,6 +462,7 @@ PSRS_SORT_REGULAR_SAMPLE = 2040
 PSRS_CONCAT_PIVOT = 2041
 PSRS_SHUFFLE = 2042
 PSRS_ALIGN = 2043
+PSRS_RANK_SHUFFLE = 2044
 # partition
 CALC_PARTITIONS_INFO = 2046
 PARTITION_MERGED = 2047
@@ -463,6 +470,7 @@ PARTITION_MERGED = 2047
 # dataframe sort
 SORT_VALUES = 2050
 SORT_INDEX = 2051
+RANK = 2052
 # window
 ROLLING_AGG = 2060

maxframe/remote/core.py CHANGED Viewed

@@ -27,6 +27,7 @@ from ..serialization.serializables import (
     ListField,
 )
 from ..tensor.core import TENSOR_TYPE
+from ..typing_ import TileableType
 from ..udf import BuiltinFunction
 from ..utils import find_objects, replace_objects
@@ -59,6 +60,9 @@ class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
     def has_custom_code(self) -> bool:
         return not isinstance(self.function, BuiltinFunction)
+    def check_inputs(self, inputs: List[TileableType]):
+        return
     @classmethod
     def _set_inputs(cls, op: "RemoteFunction", inputs: List[EntityData]):
         raw_inputs = getattr(op, "_inputs", None)

maxframe/serialization/core.cp38-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/serialization/tests/test_serial.py CHANGED Viewed

@@ -239,11 +239,11 @@ def test_pandas():
 @pytest.mark.skipif(_arrow_dtype_supported, reason="pandas doesn't support ArrowDtype")
 def test_fake_arrow_dtype_serde():
     serializer = DtypeSerializer()
-    payload, data, ok = serializer.serial(
+    payload, data, is_leaf = serializer.serial(
         FakeArrowDtype(pa.map_(pa.int64(), pa.string())), dict()
     )
-    assert ok
+    assert is_leaf
     assert data == []
     assert payload == ["PA", "map<int64, string>"]
     new_dtype = serializer.deserial(payload, dict(), list())

maxframe/tensor/arithmetic/__init__.py CHANGED Viewed

@@ -154,7 +154,7 @@ def _install():
         def inner(lhs, rhs, **kwargs):
             ret = func(lhs, rhs, **kwargs)
             if isinstance(ret, TENSOR_TYPE):
-                ret.op.magic = True
+                ret.op.extra_params["magic"] = True
             return ret
         return inner

maxframe/tensor/arithmetic/core.py CHANGED Viewed

@@ -415,8 +415,8 @@ class TensorOutBinOp(TensorOperator, TensorElementWiseWithInputs):
         dtype = [r.dtype for r in self._fun(np.empty(1, dtype=x.dtype))]
         out = out or (None, None)
-        out1 = out1 or out[0]
-        out2 = out2 or out[1]
+        out1 = out1 if out1 is not None else out[0]
+        out2 = out2 if out2 is not None else out[1]
         x, out1, out2, where = self._process_inputs(x, out1, out2, where)
         shape = x.shape
         order1 = self._calc_order(x, out1)

maxframe/tensor/arithmetic/tests/test_arithmetic.py CHANGED Viewed

@@ -16,7 +16,6 @@ import numpy as np
 import pytest
 import scipy.sparse as sps
-from ....core import enter_mode
 from ....utils import collect_leaf_operators
 from ...core import SparseTensor, Tensor
 from ...datasource import array, empty, ones, tensor
@@ -391,14 +390,6 @@ def test_get_set_real():
         a.real = [2, 4]
-def test_build_mode():
-    t1 = ones((2, 3), chunk_size=2)
-    assert t1 == 2
-    with enter_mode(build=True):
-        assert t1 != 2
 def test_unary_op_func_name():
     # make sure all the unary op has defined the func name.

maxframe/tensor/core.py CHANGED Viewed

@@ -251,6 +251,9 @@ class Tensor(HasShapeTileable):
     def __len__(self):
         return len(self._data)
+    def __bool__(self):
+        return True if is_build_mode() else bool(self.to_numpy())
     @property
     def shape(self):
         return self._data.shape

maxframe/tensor/misc/copyto.py CHANGED Viewed

@@ -83,7 +83,7 @@ class TensorCopyTo(TensorOperator, TensorOperatorMixin):
                 "could not broadcast input array "
                 f"from shape {src.shape!r} into shape {dst.shape!r}"
             )
-        if where:
+        if where is not None:
             try:
                 broadcast_to(where, dst.shape)
             except ValueError: