PyPI - maxframe - Versions diffs - 1.0.0rc2__cp310-cp310-win_amd64.whl → 1.0.0rc3__cp310-cp310-win_amd64.whl - Mend

maxframe 1.0.0rc2__cp310-cp310-win_amd64.whl → 1.0.0rc3__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (106) hide show

maxframe/_utils.cp310-win_amd64.pyd +0 -0
maxframe/codegen.py +3 -2
maxframe/config/config.py +16 -9
maxframe/config/validators.py +42 -12
maxframe/conftest.py +13 -2
maxframe/core/__init__.py +2 -13
maxframe/core/entity/__init__.py +0 -4
maxframe/core/entity/objects.py +45 -2
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/datasource/read_odps_query.py +1 -1
maxframe/dataframe/datasource/read_odps_table.py +1 -1
maxframe/dataframe/datastore/to_odps.py +1 -1
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/reduction/core.py +2 -2
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +2 -0
maxframe/{odpsio → io/odpsio}/arrow.py +4 -4
maxframe/{odpsio → io/odpsio}/schema.py +5 -5
maxframe/{odpsio → io/odpsio}/tableio.py +10 -4
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +3 -3
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +3 -3
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +57 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -2
maxframe/learn/contrib/xgboost/core.py +87 -2
maxframe/learn/contrib/xgboost/dmatrix.py +1 -4
maxframe/learn/contrib/xgboost/predict.py +19 -5
maxframe/learn/contrib/xgboost/regressor.py +3 -10
maxframe/learn/contrib/xgboost/train.py +25 -15
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
maxframe/protocol.py +1 -15
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
maxframe/tensor/__init__.py +10 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +3 -0
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +98 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +70 -0
maxframe/tensor/{base → misc}/__init__.py +2 -0
maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +1 -0
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/utils.py +2 -22
maxframe/typing_.py +4 -1
maxframe/udf.py +8 -9
maxframe/utils.py +15 -61
maxframe-1.0.0rc3.dist-info/METADATA +104 -0
{maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +101 -91
{maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
maxframe_client/fetcher.py +23 -42
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +54 -18
maxframe_client/tests/test_fetcher.py +1 -1
maxframe_client/tests/test_session.py +14 -2
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/volumeio.py +0 -95
maxframe-1.0.0rc2.dist-info/METADATA +0 -177
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base/tests → io}/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/unique.py +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0

maxframe/{odpsio → io/odpsio}/schema.py RENAMED Viewed

@@ -21,9 +21,9 @@ import pyarrow as pa
 from odps import types as odps_types
 from pandas.api import types as pd_types
-from ..core import TILEABLE_TYPE, OutputType
-from ..protocol import DataFrameTableMeta
-from ..tensor.core import TENSOR_TYPE
+from ...core import TILEABLE_TYPE, OutputType
+from ...protocol import DataFrameTableMeta
+from ...tensor.core import TENSOR_TYPE
 _TEMP_TABLE_PREFIX = "tmp_mf_"
@@ -184,7 +184,7 @@ def pandas_to_odps_schema(
     unknown_as_string: bool = False,
     ignore_index=False,
 ) -> Tuple[odps_types.OdpsSchema, DataFrameTableMeta]:
-    from .. import dataframe as md
+    from ... import dataframe as md
     from .arrow import pandas_to_arrow
     if _is_scalar_object(df_obj):
@@ -278,7 +278,7 @@ def build_table_column_name(
 def build_dataframe_table_meta(
     df_obj: Any, ignore_index: bool = False
 ) -> DataFrameTableMeta:
-    from .. import dataframe as md
+    from ... import dataframe as md
     col_to_count = defaultdict(lambda: 0)
     col_to_idx = defaultdict(lambda: 0)

maxframe/{odpsio → io/odpsio}/tableio.py RENAMED Viewed

@@ -20,6 +20,7 @@ from typing import Dict, List, Optional, Union
 import pyarrow as pa
 from odps import ODPS
+from odps import __version__ as pyodps_version
 from odps.apis.storage_api import (
     StorageApiArrowClient,
     TableBatchScanResponse,
@@ -34,13 +35,15 @@ try:
 except ImportError:
     pac = None
-from ..config import options
-from ..env import ODPS_STORAGE_API_ENDPOINT
+from ...config import options
+from ...env import ODPS_STORAGE_API_ENDPOINT
+from ...lib.version import Version
 from .schema import odps_schema_to_arrow_schema
 PartitionsType = Union[List[str], str, None]
 _DEFAULT_ROW_BATCH_SIZE = 4096
+_need_convert_timezone = Version(pyodps_version) < Version("0.11.7")
 @contextmanager
@@ -191,7 +194,7 @@ class TunnelMultiPartitionReader:
         arrays = []
         for idx in range(batch.num_columns):
             col = batch.column(idx)
-            if isinstance(col.type, pa.TimestampType):
+            if _need_convert_timezone and isinstance(col.type, pa.TimestampType):
                 if col.type.tz is not None:
                     target_type = pa.timestamp(
                         self._schema.types[idx].unit, col.type.tz
@@ -354,7 +357,10 @@ class TunnelTableIO(ODPSTableIO):
                 # fixme should yield writer directly once pyodps fixes
                 #  related arrow timestamp bug when provided schema and
                 #  table schema is identical.
-                yield TunnelWrappedWriter(writer)
+                if _need_convert_timezone:
+                    yield TunnelWrappedWriter(writer)
+                else:
+                    yield writer
 class HaloTableArrowReader:

maxframe/io/odpsio/tests/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

maxframe/{odpsio → io/odpsio}/tests/test_schema.py RENAMED Viewed

@@ -18,9 +18,9 @@ import pyarrow as pa
 import pytest
 from odps import types as odps_types
-from ... import dataframe as md
-from ... import tensor as mt
-from ...core import OutputType
+from .... import dataframe as md
+from .... import tensor as mt
+from ....core import OutputType
 from ..schema import (
     arrow_schema_to_odps_schema,
     build_dataframe_table_meta,

maxframe/{odpsio → io/odpsio}/tests/test_tableio.py RENAMED Viewed

@@ -20,9 +20,9 @@ import pyarrow as pa
 import pytest
 from odps import ODPS
-from ...config import options
-from ...tests.utils import flaky, tn
-from ...utils import config_odps_default_options
+from ....config import options
+from ....tests.utils import flaky, tn
+from ....utils import config_odps_default_options
 from ..tableio import ODPSTableIO

maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py RENAMED Viewed

@@ -15,7 +15,7 @@
 import pytest
 from odps import ODPS
-from ...tests.utils import tn
+from ....tests.utils import tn
 from ..volumeio import ODPSVolumeReader, ODPSVolumeWriter
@@ -69,19 +69,17 @@ def create_volume(request, oss_config):
             oss_config.oss_bucket.batch_delete_objects(keys)
-@pytest.mark.parametrize("create_volume", ["parted", "external"], indirect=True)
+@pytest.mark.parametrize("create_volume", ["external"], indirect=True)
 def test_read_write_volume(create_volume):
     test_vol_dir = "test_vol_dir"
     odps_entry = ODPS.from_environments()
     writer = ODPSVolumeWriter(odps_entry, create_volume, test_vol_dir)
-    write_session_id = writer.create_write_session()
     writer = ODPSVolumeWriter(odps_entry, create_volume, test_vol_dir)
-    writer.write_file("file1", b"content1", write_session_id)
-    writer.write_file("file2", b"content2", write_session_id)
-    writer.commit(["file1", "file2"], write_session_id)
+    writer.write_file("file1", b"content1")
+    writer.write_file("file2", b"content2")
     reader = ODPSVolumeReader(odps_entry, create_volume, test_vol_dir)
     assert reader.read_file("file1") == b"content1"

maxframe/io/odpsio/volumeio.py ADDED Viewed

@@ -0,0 +1,57 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+from typing import Iterator, List, Union
+from odps import ODPS
+class ODPSVolumeReader:
+    def __init__(self, odps_entry: ODPS, volume_name: str, volume_dir: str):
+        self._odps_entry = odps_entry
+        self._volume = odps_entry.get_volume(volume_name)
+        self._volume_dir = volume_dir
+    def list_files(self) -> List[str]:
+        def _get_file_name(vol_file):
+            if hasattr(vol_file, "name"):
+                return vol_file.name
+            return vol_file.path.rsplit("/", 1)[-1]
+        return [
+            _get_file_name(f)
+            for f in self._odps_entry.list_volume_files(
+                f"/{self._volume.name}/{self._volume_dir}"
+            )
+        ]
+    def read_file(self, file_name: str) -> bytes:
+        with self._volume.open_reader(self._volume_dir + "/" + file_name) as reader:
+            return reader.read()
+class ODPSVolumeWriter:
+    def __init__(self, odps_entry: ODPS, volume_name: str, volume_dir: str):
+        self._odps_entry = odps_entry
+        self._volume = odps_entry.get_volume(volume_name)
+        self._volume_dir = volume_dir
+    def write_file(self, file_name: str, data: Union[bytes, Iterator[bytes]]):
+        with self._volume.open_writer(self._volume_dir + "/" + file_name) as writer:
+            if not inspect.isgenerator(data):
+                writer.write(data)
+            else:
+                for chunk in data:
+                    writer.write(chunk)

maxframe/learn/contrib/xgboost/classifier.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import numpy as np
-from ....tensor import argmax
+from ....tensor import argmax, transpose, vstack
 from ..utils import make_import_error_func
 from .core import XGBScikitLearnBase, xgboost
@@ -42,7 +42,10 @@ else:
             sample_weight_eval_set=None,
             base_margin_eval_set=None,
             num_class=None,
+            **kw,
         ):
+            session = kw.pop("session", None)
+            run_kwargs = kw.pop("run_kwargs", dict())
             dtrain, evals = wrap_evaluation_matrices(
                 None,
                 X,
@@ -68,6 +71,8 @@ else:
                 evals=evals,
                 evals_result=self.evals_result_,
                 num_class=num_class,
+                session=session,
+                run_kwargs=run_kwargs,
             )
             self._Booster = result
             return self
@@ -83,4 +88,23 @@ else:
         def predict_proba(self, data, ntree_limit=None, flag=False, **kw):
             if ntree_limit is not None:
                 raise NotImplementedError("ntree_limit is not currently supported")
-            return predict(self.get_booster(), data, flag=flag, **kw)
+            prediction = predict(self.get_booster(), data, flag=flag, **kw)
+            if len(prediction.shape) == 2 and prediction.shape[1] == self.n_classes_:
+                # multi-class
+                return prediction
+            if (
+                len(prediction.shape) == 2
+                and self.n_classes_ == 2
+                and prediction.shape[1] >= self.n_classes_
+            ):
+                # multi-label
+                return prediction
+            # binary logistic function
+            classone_probs = prediction
+            classzero_probs = 1.0 - classone_probs
+            return transpose(vstack((classzero_probs, classone_probs)))
+        @property
+        def classes_(self) -> np.ndarray:
+            return np.arange(self.n_classes_)

maxframe/learn/contrib/xgboost/core.py CHANGED Viewed

@@ -12,15 +12,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple
 try:
     import xgboost
 except ImportError:
     xgboost = None
+from ...core import Model, ModelData
 from .dmatrix import DMatrix
+class BoosterData(ModelData):
+    __slots__ = ("_evals_result",)
+    _evals_result: Dict
+    def __init__(self, *args, evals_result=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._evals_result = evals_result if evals_result is not None else dict()
+    def execute(self, session=None, **kw):
+        # The evals_result should be fetched when BoosterData.execute() is called.
+        result = super().execute(session=session, **kw)
+        if self.op.has_evals_result and self.key == self.op.outputs[0].key:
+            self._evals_result.update(self.op.outputs[1].fetch(session=session))
+        return result
+    def predict(
+        self,
+        data,
+        output_margin=False,
+        pred_leaf=False,
+        pred_contribs=False,
+        approx_contribs=False,
+        pred_interactions=False,
+        validate_features=True,
+        training=False,
+        iteration_range=None,
+        strict_shape=False,
+    ):
+        from .predict import predict
+        return predict(
+            self,
+            data,
+            output_margin=output_margin,
+            pred_leaf=pred_leaf,
+            pred_contribs=pred_contribs,
+            approx_contribs=approx_contribs,
+            pred_interactions=pred_interactions,
+            validate_features=validate_features,
+            training=training,
+            iteration_range=iteration_range,
+            strict_shape=strict_shape,
+        )
+class Booster(Model):
+    pass
 if not xgboost:
     XGBScikitLearnBase = None
 else:
@@ -40,7 +92,9 @@ else:
             **kw,
         ):
             """
-            Fit the regressor.
+            Fit the regressor. Note that fit() is an eager-execution
+            API. The call will be blocked until training finished.
             Parameters
             ----------
             X : array_like
@@ -72,6 +126,37 @@ else:
             """
             raise NotImplementedError
+        def evals_result(self, **kw) -> Dict:
+            """Return the evaluation results.
+            If **eval_set** is passed to the :py:meth:`fit` function, you can call
+            ``evals_result()`` to get evaluation results for all passed **eval_sets**.  When
+            **eval_metric** is also passed to the :py:meth:`fit` function, the
+            **evals_result** will contain the **eval_metrics** passed to the :py:meth:`fit`
+            function.
+            The returned evaluation result is a dictionary:
+            .. code-block:: python
+                {'validation_0': {'logloss': ['0.604835', '0.531479']},
+                 'validation_1': {'logloss': ['0.41965', '0.17686']}}
+            Note that evals_result() will be blocked until the train is finished.
+            Returns
+            -------
+            evals_result
+            """
+            result = super().evals_result()
+            if not self._Booster.op.has_evals_result or len(result) != 0:
+                return result
+            session = kw.pop("session", None)
+            run_kwargs = kw.pop("run_kwargs", dict())
+            self._Booster.execute(session=session, **run_kwargs)
+            return super().evals_result()
     def wrap_evaluation_matrices(
         missing: float,
         X: Any,

maxframe/learn/contrib/xgboost/dmatrix.py CHANGED Viewed

@@ -99,10 +99,7 @@ def check_array_like(y: TileableType, name: str) -> TileableType:
     y = convert_to_tensor_or_dataframe(y)
     if isinstance(y, DATAFRAME_TYPE):
         y = y.iloc[:, 0]
-    y = astensor(y)
-    if y.ndim != 1:
-        raise ValueError(f"Expecting 1-d {name}, got: {y.ndim}-d")
-    return y
+    return astensor(y)
 def to_dmatrix(

maxframe/learn/contrib/xgboost/predict.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import pickle
 import numpy as np
 import pandas as pd
@@ -22,8 +21,14 @@ from ....core.entity.output_types import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
 from ....dataframe.utils import parse_index
-from ....serialization.serializables import BoolField, BytesField, KeyField, TupleField
+from ....serialization.serializables import (
+    BoolField,
+    KeyField,
+    ReferenceField,
+    TupleField,
+)
 from ....tensor.core import TENSOR_TYPE, TensorOrder
+from .core import BoosterData
 from .dmatrix import check_data
@@ -32,9 +37,7 @@ class XGBPredict(Operator, TileableOperatorMixin):
     output_dtype = np.dtype(np.float32)
     data = KeyField("data", default=None)
-    model = BytesField(
-        "model", on_serialize=pickle.dumps, on_deserialize=pickle.loads, default=None
-    )
+    model = ReferenceField("model", reference_type=BoosterData, default=None)
     pred_leaf = BoolField("pred_leaf", default=False)
     pred_contribs = BoolField("pred_contribs", default=False)
     approx_contribs = BoolField("approx_contribs", default=False)
@@ -107,6 +110,17 @@ def predict(
     strict_shape=False,
     flag=False,
 ):
+    """
+    Using MaxFrame XGBoost model to predict data.
+    Parameters
+    ----------
+    Parameters are the same as `xgboost.train`. The predict() is lazy-execution mode.
+    Returns
+    -------
+    results: Booster
+    """
     data = check_data(data)
     # TODO: check model datatype

maxframe/learn/contrib/xgboost/regressor.py CHANGED Viewed

@@ -41,11 +41,6 @@ else:
         ):
             session = kw.pop("session", None)
             run_kwargs = kw.pop("run_kwargs", dict())
-            if kw:
-                raise TypeError(
-                    f"fit got an unexpected keyword argument '{next(iter(kw))}'"
-                )
             dtrain, evals = wrap_evaluation_matrices(
                 None,
                 X,
@@ -57,6 +52,8 @@ else:
                 base_margin_eval_set,
             )
             params = self.get_xgb_params()
+            if not params.get("objective"):
+                params["objective"] = "reg:squarederror"
             self.evals_result_ = dict()
             result = train(
                 params,
@@ -71,8 +68,4 @@ else:
             return self
         def predict(self, data, **kw):
-            session = kw.pop("session", None)
-            run_kwargs = kw.pop("run_kwargs", None)
-            return predict(
-                self.get_booster(), data, session=session, run_kwargs=run_kwargs, **kw
-            )
+            return predict(self.get_booster(), data, **kw)

maxframe/learn/contrib/xgboost/train.py CHANGED Viewed

@@ -29,6 +29,7 @@ from ....serialization.serializables import (
     KeyField,
     ListField,
 )
+from .core import Booster
 from .dmatrix import ToDMatrix, to_dmatrix
 logger = logging.getLogger(__name__)
@@ -59,49 +60,59 @@ class XGBTrain(Operator, TileableOperatorMixin):
     num_boost_round = Int64Field("num_boost_round", default=10)
     num_class = Int64Field("num_class", default=None)
-    # Store evals_result in local to store the remote evals_result
-    evals_result: dict = None
     def __init__(self, gpu=None, **kw):
         super().__init__(gpu=gpu, **kw)
         if self.output_types is None:
             self.output_types = [OutputType.object]
+        if self.has_evals_result:
+            self.output_types.append(OutputType.object)
     def _set_inputs(self, inputs):
         super()._set_inputs(inputs)
         self.dtrain = self._inputs[0]
         rest = self._inputs[1:]
-        if self.evals is not None:
+        if self.has_evals_result:
             evals_dict = OrderedDict(self.evals)
             new_evals_dict = OrderedDict()
             for new_key, val in zip(rest, evals_dict.values()):
                 new_evals_dict[new_key] = val
             self.evals = list(new_evals_dict.items())
-    def __call__(self):
+    def __call__(self, evals_result):
         inputs = [self.dtrain]
-        if self.evals is not None:
+        if self.has_evals_result:
             inputs.extend(e[0] for e in self.evals)
-        return self.new_tileable(inputs)
+        return self.new_tileables(
+            inputs, object_class=Booster, evals_result=evals_result
+        )[0]
+    @property
+    def output_limit(self):
+        return 2 if self.has_evals_result else 1
+    @property
+    def has_evals_result(self) -> bool:
+        return self.evals
 def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwargs):
     """
-    Train XGBoost model in Mars manner.
+    Train XGBoost model in MaxFrame manner.
     Parameters
     ----------
-    Parameters are the same as `xgboost.train`.
+    Parameters are the same as `xgboost.train`. Note that train is an eager-execution
+    API. The call will be blocked until training finished.
     Returns
     -------
     results: Booster
     """
-    evals_result = evals_result or dict()
-    evals = None or ()
+    evals_result = evals_result if evals_result is not None else dict()
     processed_evals = []
+    session = kwargs.pop("session", None)
+    run_kwargs = kwargs.pop("run_kwargs", dict())
     if evals:
         for eval_dmatrix, name in evals:
             if not isinstance(name, str):
@@ -110,12 +121,11 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
                 processed_evals.append((eval_dmatrix, name))
             else:
                 processed_evals.append((to_dmatrix(eval_dmatrix), name))
     return XGBTrain(
         params=params,
         dtrain=dtrain,
         evals=processed_evals,
         evals_result=evals_result,
         num_class=num_class,
-        **kwargs
-    )()
+        **kwargs,
+    )(evals_result).execute(session=session, **run_kwargs)

maxframe/{core/operator/fuse.py → learn/core.py} RENAMED Viewed

@@ -12,18 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ... import opcodes
-from ...serialization.serializables import ReferenceField
-from ..graph import ChunkGraph
-from .base import Operator
+from ..core.entity.objects import Object, ObjectData
-class Fuse(Operator):
-    __slots__ = ("_fuse_graph",)
-    _op_type_ = opcodes.FUSE
+class ModelData(ObjectData):
+    pass
-    fuse_graph = ReferenceField("fuse_graph", ChunkGraph)
+class Model(Object):
+    pass
-class FuseChunkMixin:
-    __slots__ = ()
+MODEL_TYPE = (Model, ModelData)

maxframe/lib/mmh3.cp310-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/protocol.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import base64
 import enum
 import uuid
-from typing import Any, Dict, Generic, List, Optional, Tuple, Type, TypeVar
+from typing import Any, Dict, Generic, List, Optional, Type, TypeVar
 import pandas as pd
@@ -38,7 +38,6 @@ from .serialization.serializables import (
     Serializable,
     SeriesField,
     StringField,
-    TupleField,
 )
 pickling_support.install()
@@ -92,19 +91,6 @@ class DataSerializeType(enum.Enum):
     PICKLE = 0
-class VolumeDataMeta(Serializable):
-    output_type: OutputType = EnumField(
-        "output_type", OutputType, FieldTypes.int8, default=None
-    )
-    serial_type: DataSerializeType = EnumField(
-        "serial_type", DataSerializeType, FieldTypes.int8, default=None
-    )
-    shape: Tuple[int, ...] = TupleField("shape", FieldTypes.int64, default=None)
-    nsplits: Tuple[Tuple[int, ...], ...] = TupleField(
-        "nsplits", FieldTypes.tuple(FieldTypes.tuple(FieldTypes.int64)), default=None
-    )
 _result_type_to_info_cls: Dict[ResultType, Type["ResultInfo"]] = dict()

maxframe/remote/core.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from functools import partial
 from .. import opcodes
-from ..core import ENTITY_TYPE, ChunkData
+from ..core import ENTITY_TYPE
 from ..core.operator import ObjectOperator, ObjectOperatorMixin
 from ..dataframe.core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
 from ..serialization.serializables import (
@@ -26,7 +26,7 @@ from ..serialization.serializables import (
     ListField,
 )
 from ..tensor.core import TENSOR_TYPE
-from ..utils import build_fetch_tileable, find_objects, replace_objects
+from ..utils import find_objects, replace_objects
 class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
@@ -63,12 +63,8 @@ class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
         if raw_inputs is not None:
             for raw_inp in raw_inputs:
                 if self._no_prepare(raw_inp):
-                    if not isinstance(self._inputs[0], ChunkData):
-                        # not in tile, set_inputs from tileable
-                        mapping[raw_inp] = next(function_inputs)
-                    else:
-                        # in tile, set_inputs from chunk
-                        mapping[raw_inp] = build_fetch_tileable(raw_inp)
+                    # not in tile, set_inputs from tileable
+                    mapping[raw_inp] = next(function_inputs)
                 else:
                     mapping[raw_inp] = next(function_inputs)
         self.function_args = replace_objects(self.function_args, mapping)

maxframe/serialization/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@ from .core import (
     PickleContainer,
     Serializer,
     deserialize,
+    load_type,
     pickle_buffers,
     serialize,
     serialize_with_spawn,

maxframe/serialization/core.cp310-win_amd64.pyd CHANGED Viewed

Binary file