PyPI - maxframe - Versions diffs - 1.0.0rc1__cp38-cp38-macosx_10_9_universal2.whl → 1.0.0rc3__cp38-cp38-macosx_10_9_universal2.whl - Mend

maxframe 1.0.0rc1__cp38-cp38-macosx_10_9_universal2.whl → 1.0.0rc3__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (138) hide show

maxframe/_utils.cpython-38-darwin.so +0 -0
maxframe/codegen.py +3 -6
maxframe/config/config.py +49 -10
maxframe/config/validators.py +42 -11
maxframe/conftest.py +15 -2
maxframe/core/__init__.py +2 -13
maxframe/core/entity/__init__.py +0 -4
maxframe/core/entity/objects.py +46 -3
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cpython-38-darwin.so +0 -0
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +5 -55
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
maxframe/dataframe/core.py +5 -5
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +7 -1
maxframe/dataframe/datasource/read_odps_table.py +3 -2
maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
maxframe/dataframe/datastore/to_odps.py +1 -1
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/rename.py +3 -37
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/merge/merge.py +236 -2
maxframe/dataframe/merge/tests/test_merge.py +123 -0
maxframe/dataframe/misc/apply.py +3 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +4 -25
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/reduction/core.py +2 -2
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/quantile.py +5 -17
maxframe/dataframe/utils.py +4 -7
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
maxframe/{odpsio → io/odpsio}/arrow.py +12 -8
maxframe/{odpsio → io/odpsio}/schema.py +15 -12
maxframe/io/odpsio/tableio.py +702 -0
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +19 -18
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +57 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -2
maxframe/learn/contrib/xgboost/core.py +87 -2
maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
maxframe/learn/contrib/xgboost/predict.py +21 -7
maxframe/learn/contrib/xgboost/regressor.py +3 -10
maxframe/learn/contrib/xgboost/train.py +27 -17
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
maxframe/protocol.py +41 -17
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cpython-38-darwin.so +0 -0
maxframe/serialization/serializables/core.py +48 -9
maxframe/tensor/__init__.py +69 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +3 -0
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +98 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +70 -0
maxframe/tensor/{base → misc}/__init__.py +2 -0
maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/{base → misc}/unique.py +2 -2
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +1 -0
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tensor/utils.py +2 -22
maxframe/tests/utils.py +11 -2
maxframe/typing_.py +4 -1
maxframe/udf.py +8 -9
maxframe/utils.py +32 -70
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/METADATA +25 -25
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +133 -123
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
maxframe_client/fetcher.py +60 -68
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +58 -22
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +27 -4
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/tableio.py +0 -322
maxframe/odpsio/volumeio.py +0 -95
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base/tests → io}/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0

maxframe/{odpsio → io/odpsio}/tests/test_schema.py RENAMED Viewed

@@ -18,9 +18,9 @@ import pyarrow as pa
 import pytest
 from odps import types as odps_types
-from ... import dataframe as md
-from ... import tensor as mt
-from ...core import OutputType
+from .... import dataframe as md
+from .... import tensor as mt
+from ....core import OutputType
 from ..schema import (
     arrow_schema_to_odps_schema,
     build_dataframe_table_meta,
@@ -143,17 +143,17 @@ def test_pandas_to_odps_schema_index(wrap_obj):
     data = pd.Index(np.random.randint(0, 100, 100))
     test_idx = _wrap_maxframe_obj(data, wrap=wrap_obj)
-    schema, meta = pandas_to_odps_schema(test_idx, unknown_as_string=True)
-    assert [c.name for c in schema.columns] == ["_idx_0"]
-    assert [c.type.name for c in schema.columns] == ["bigint"]
-    assert meta.type == OutputType.index
-    assert meta.table_column_names == []
-    assert meta.table_index_column_names == ["_idx_0"]
-    assert meta.pd_column_level_names == []
-    assert meta.pd_index_level_names == [None]
-    with pytest.raises(AssertionError):
-        pandas_to_odps_schema(test_idx, unknown_as_string=True, ignore_index=True)
+    for ignore_idx in (False, True):
+        schema, meta = pandas_to_odps_schema(
+            test_idx, unknown_as_string=True, ignore_index=ignore_idx
+        )
+        assert [c.name for c in schema.columns] == ["_idx_0"]
+        assert [c.type.name for c in schema.columns] == ["bigint"]
+        assert meta.type == OutputType.index
+        assert meta.table_column_names == []
+        assert meta.table_index_column_names == ["_idx_0"]
+        assert meta.pd_column_level_names == []
+        assert meta.pd_index_level_names == [None]
     data = pd.MultiIndex.from_arrays(
         [np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)],
@@ -177,6 +177,7 @@ def test_pandas_to_odps_schema_scalar(wrap_obj):
     test_scalar = _wrap_maxframe_obj(data, wrap=wrap_obj)
     if wrap_obj != "no":
         test_scalar.op.data = None
     schema, meta = pandas_to_odps_schema(test_scalar, unknown_as_string=True)
     assert schema.columns[0].name == "_idx_0"
     assert schema.columns[0].type.name == "double"
@@ -186,9 +187,6 @@ def test_pandas_to_odps_schema_scalar(wrap_obj):
     assert meta.pd_column_level_names == []
     assert meta.pd_index_level_names == [None]
-    with pytest.raises(AssertionError):
-        pandas_to_odps_schema(test_scalar, unknown_as_string=True, ignore_index=True)
 def test_odps_arrow_schema_conversion():
     odps_schema = odps_types.OdpsSchema(
@@ -211,10 +209,11 @@ def test_odps_arrow_schema_conversion():
             odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
             odps_types.Column("col17", "CHAR(15)"),
             odps_types.Column("col18", "VARCHAR(15)"),
+            odps_types.Column("col19", "decimal"),
         ]
     )
     arrow_schema = odps_schema_to_arrow_schema(odps_schema)
-    assert arrow_schema.names == [f"col{i}" for i in range(1, 19)]
+    assert arrow_schema.names == [f"col{i}" for i in range(1, 20)]
     assert arrow_schema.types == [
         pa.string(),
         pa.binary(),
@@ -234,6 +233,7 @@ def test_odps_arrow_schema_conversion():
         pa.struct([("a1", pa.string()), ("a2", pa.map_(pa.string(), pa.int64()))]),
         pa.string(),
         pa.string(),
+        pa.decimal128(38, 18),
     ]
     expected_odps_schema = odps_types.OdpsSchema(
@@ -256,6 +256,7 @@ def test_odps_arrow_schema_conversion():
             odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
             odps_types.Column("col17", "string"),
             odps_types.Column("col18", "string"),
+            odps_types.Column("col19", "decimal(38, 18)"),
         ]
     )

maxframe/{odpsio → io/odpsio}/tests/test_tableio.py RENAMED Viewed

@@ -12,22 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import datetime
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+import pytest
 from odps import ODPS
-from ...tests.utils import flaky, tn
-from ...utils import config_odps_default_options
-from ..tableio import HaloTableIO
+from ....config import options
+from ....tests.utils import flaky, tn
+from ....utils import config_odps_default_options
+from ..tableio import ODPSTableIO
+@pytest.fixture
+def switch_table_io(request):
+    old_use_common_table = options.use_common_table
+    try:
+        options.use_common_table = request.param
+        yield
+    finally:
+        options.use_common_table = old_use_common_table
 @flaky(max_runs=3)
-def test_empty_table_io():
+@pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
+def test_empty_table_io(switch_table_io):
     config_odps_default_options()
     o = ODPS.from_environments()
-    halo_table_io = HaloTableIO(o)
+    table_io = ODPSTableIO(o)
     # test read from empty table
     empty_table_name = tn("test_empty_table_halo_read")
@@ -35,42 +50,53 @@ def test_empty_table_io():
     tb = o.create_table(empty_table_name, "col1 string", lifecycle=1)
     try:
-        with halo_table_io.open_reader(empty_table_name) as reader:
+        with table_io.open_reader(empty_table_name) as reader:
             assert len(reader.read_all()) == 0
     finally:
         tb.drop()
 @flaky(max_runs=3)
-def test_table_io_without_parts():
+@pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
+def test_table_io_without_parts(switch_table_io):
     config_odps_default_options()
     o = ODPS.from_environments()
-    halo_table_io = HaloTableIO(o)
+    table_io = ODPSTableIO(o)
     # test read and write tables without partition
     no_part_table_name = tn("test_no_part_halo_write")
     o.delete_table(no_part_table_name, if_exists=True)
-    tb = o.create_table(
-        no_part_table_name, ",".join(f"{c} double" for c in "abcde"), lifecycle=1
-    )
+    col_desc = ",".join(f"{c} double" for c in "abcde") + ", f datetime"
+    tb = o.create_table(no_part_table_name, col_desc, lifecycle=1)
     try:
         pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
-        with halo_table_io.open_writer(no_part_table_name) as writer:
+        date_val = [
+            (
+                datetime.datetime.now().replace(microsecond=0)
+                + datetime.timedelta(seconds=i)
+            )
+            for i in range(100)
+        ]
+        pd_data["f"] = pd.Series(date_val, dtype="datetime64[ms]").dt.tz_localize(
+            options.local_timezone
+        )
+        with table_io.open_writer(no_part_table_name) as writer:
             writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
-        with halo_table_io.open_reader(no_part_table_name) as reader:
+        with table_io.open_reader(no_part_table_name) as reader:
             pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
     finally:
         tb.drop()
 @flaky(max_runs=3)
-def test_table_io_with_range_reader():
+@pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
+def test_table_io_with_range_reader(switch_table_io):
     config_odps_default_options()
     o = ODPS.from_environments()
-    halo_table_io = HaloTableIO(o)
+    table_io = ODPSTableIO(o)
     # test read and write tables without partition
     no_part_table_name = tn("test_no_part_halo_write")
@@ -81,15 +107,15 @@ def test_table_io_with_range_reader():
     try:
         pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
-        with halo_table_io.open_writer(no_part_table_name) as writer:
+        with table_io.open_writer(no_part_table_name) as writer:
             writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
-        with halo_table_io.open_reader(
+        with table_io.open_reader(
             no_part_table_name, start=None, stop=100, row_batch_size=10
         ) as reader:
             pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
-        with halo_table_io.open_reader(
+        with table_io.open_reader(
             no_part_table_name,
             start=-2,
             stop=-52,
@@ -105,11 +131,12 @@ def test_table_io_with_range_reader():
 @flaky(max_runs=3)
-def test_table_io_with_parts():
+@pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
+def test_table_io_with_parts(switch_table_io):
     config_odps_default_options()
     o = ODPS.from_environments()
-    halo_table_io = HaloTableIO(o)
+    table_io = ODPSTableIO(o)
     # test read and write tables with partition
     parted_table_name = tn("test_parted_halo_write")
@@ -122,11 +149,11 @@ def test_table_io_with_parts():
     try:
         pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
-        with halo_table_io.open_writer(parted_table_name, "pt=test") as writer:
+        with table_io.open_writer(parted_table_name, "pt=test") as writer:
             writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
-        with halo_table_io.open_reader(parted_table_name, "pt=test") as reader:
+        with table_io.open_reader(parted_table_name, "pt=test") as reader:
             pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
-        with halo_table_io.open_reader(
+        with table_io.open_reader(
             parted_table_name, "pt=test", partition_columns=True
         ) as reader:
             expected_data = pd_data.copy()

maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py RENAMED Viewed

@@ -15,7 +15,7 @@
 import pytest
 from odps import ODPS
-from ...tests.utils import tn
+from ....tests.utils import tn
 from ..volumeio import ODPSVolumeReader, ODPSVolumeWriter
@@ -69,19 +69,17 @@ def create_volume(request, oss_config):
             oss_config.oss_bucket.batch_delete_objects(keys)
-@pytest.mark.parametrize("create_volume", ["parted", "external"], indirect=True)
+@pytest.mark.parametrize("create_volume", ["external"], indirect=True)
 def test_read_write_volume(create_volume):
     test_vol_dir = "test_vol_dir"
     odps_entry = ODPS.from_environments()
     writer = ODPSVolumeWriter(odps_entry, create_volume, test_vol_dir)
-    write_session_id = writer.create_write_session()
     writer = ODPSVolumeWriter(odps_entry, create_volume, test_vol_dir)
-    writer.write_file("file1", b"content1", write_session_id)
-    writer.write_file("file2", b"content2", write_session_id)
-    writer.commit(["file1", "file2"], write_session_id)
+    writer.write_file("file1", b"content1")
+    writer.write_file("file2", b"content2")
     reader = ODPSVolumeReader(odps_entry, create_volume, test_vol_dir)
     assert reader.read_file("file1") == b"content1"

maxframe/io/odpsio/volumeio.py ADDED Viewed

@@ -0,0 +1,57 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+from typing import Iterator, List, Union
+from odps import ODPS
+class ODPSVolumeReader:
+    def __init__(self, odps_entry: ODPS, volume_name: str, volume_dir: str):
+        self._odps_entry = odps_entry
+        self._volume = odps_entry.get_volume(volume_name)
+        self._volume_dir = volume_dir
+    def list_files(self) -> List[str]:
+        def _get_file_name(vol_file):
+            if hasattr(vol_file, "name"):
+                return vol_file.name
+            return vol_file.path.rsplit("/", 1)[-1]
+        return [
+            _get_file_name(f)
+            for f in self._odps_entry.list_volume_files(
+                f"/{self._volume.name}/{self._volume_dir}"
+            )
+        ]
+    def read_file(self, file_name: str) -> bytes:
+        with self._volume.open_reader(self._volume_dir + "/" + file_name) as reader:
+            return reader.read()
+class ODPSVolumeWriter:
+    def __init__(self, odps_entry: ODPS, volume_name: str, volume_dir: str):
+        self._odps_entry = odps_entry
+        self._volume = odps_entry.get_volume(volume_name)
+        self._volume_dir = volume_dir
+    def write_file(self, file_name: str, data: Union[bytes, Iterator[bytes]]):
+        with self._volume.open_writer(self._volume_dir + "/" + file_name) as writer:
+            if not inspect.isgenerator(data):
+                writer.write(data)
+            else:
+                for chunk in data:
+                    writer.write(chunk)

maxframe/learn/contrib/xgboost/classifier.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import numpy as np
-from ....tensor import argmax
+from ....tensor import argmax, transpose, vstack
 from ..utils import make_import_error_func
 from .core import XGBScikitLearnBase, xgboost
@@ -42,7 +42,10 @@ else:
             sample_weight_eval_set=None,
             base_margin_eval_set=None,
             num_class=None,
+            **kw,
         ):
+            session = kw.pop("session", None)
+            run_kwargs = kw.pop("run_kwargs", dict())
             dtrain, evals = wrap_evaluation_matrices(
                 None,
                 X,
@@ -68,6 +71,8 @@ else:
                 evals=evals,
                 evals_result=self.evals_result_,
                 num_class=num_class,
+                session=session,
+                run_kwargs=run_kwargs,
             )
             self._Booster = result
             return self
@@ -83,4 +88,23 @@ else:
         def predict_proba(self, data, ntree_limit=None, flag=False, **kw):
             if ntree_limit is not None:
                 raise NotImplementedError("ntree_limit is not currently supported")
-            return predict(self.get_booster(), data, flag=flag, **kw)
+            prediction = predict(self.get_booster(), data, flag=flag, **kw)
+            if len(prediction.shape) == 2 and prediction.shape[1] == self.n_classes_:
+                # multi-class
+                return prediction
+            if (
+                len(prediction.shape) == 2
+                and self.n_classes_ == 2
+                and prediction.shape[1] >= self.n_classes_
+            ):
+                # multi-label
+                return prediction
+            # binary logistic function
+            classone_probs = prediction
+            classzero_probs = 1.0 - classone_probs
+            return transpose(vstack((classzero_probs, classone_probs)))
+        @property
+        def classes_(self) -> np.ndarray:
+            return np.arange(self.n_classes_)

maxframe/learn/contrib/xgboost/core.py CHANGED Viewed

@@ -12,15 +12,67 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple
 try:
     import xgboost
 except ImportError:
     xgboost = None
+from ...core import Model, ModelData
 from .dmatrix import DMatrix
+class BoosterData(ModelData):
+    __slots__ = ("_evals_result",)
+    _evals_result: Dict
+    def __init__(self, *args, evals_result=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._evals_result = evals_result if evals_result is not None else dict()
+    def execute(self, session=None, **kw):
+        # The evals_result should be fetched when BoosterData.execute() is called.
+        result = super().execute(session=session, **kw)
+        if self.op.has_evals_result and self.key == self.op.outputs[0].key:
+            self._evals_result.update(self.op.outputs[1].fetch(session=session))
+        return result
+    def predict(
+        self,
+        data,
+        output_margin=False,
+        pred_leaf=False,
+        pred_contribs=False,
+        approx_contribs=False,
+        pred_interactions=False,
+        validate_features=True,
+        training=False,
+        iteration_range=None,
+        strict_shape=False,
+    ):
+        from .predict import predict
+        return predict(
+            self,
+            data,
+            output_margin=output_margin,
+            pred_leaf=pred_leaf,
+            pred_contribs=pred_contribs,
+            approx_contribs=approx_contribs,
+            pred_interactions=pred_interactions,
+            validate_features=validate_features,
+            training=training,
+            iteration_range=iteration_range,
+            strict_shape=strict_shape,
+        )
+class Booster(Model):
+    pass
 if not xgboost:
     XGBScikitLearnBase = None
 else:
@@ -40,7 +92,9 @@ else:
             **kw,
         ):
             """
-            Fit the regressor.
+            Fit the regressor. Note that fit() is an eager-execution
+            API. The call will be blocked until training finished.
             Parameters
             ----------
             X : array_like
@@ -72,6 +126,37 @@ else:
             """
             raise NotImplementedError
+        def evals_result(self, **kw) -> Dict:
+            """Return the evaluation results.
+            If **eval_set** is passed to the :py:meth:`fit` function, you can call
+            ``evals_result()`` to get evaluation results for all passed **eval_sets**.  When
+            **eval_metric** is also passed to the :py:meth:`fit` function, the
+            **evals_result** will contain the **eval_metrics** passed to the :py:meth:`fit`
+            function.
+            The returned evaluation result is a dictionary:
+            .. code-block:: python
+                {'validation_0': {'logloss': ['0.604835', '0.531479']},
+                 'validation_1': {'logloss': ['0.41965', '0.17686']}}
+            Note that evals_result() will be blocked until the train is finished.
+            Returns
+            -------
+            evals_result
+            """
+            result = super().evals_result()
+            if not self._Booster.op.has_evals_result or len(result) != 0:
+                return result
+            session = kw.pop("session", None)
+            run_kwargs = kw.pop("run_kwargs", dict())
+            self._Booster.execute(session=session, **run_kwargs)
+            return super().evals_result()
     def wrap_evaluation_matrices(
         missing: float,
         X: Any,

maxframe/learn/contrib/xgboost/dmatrix.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
-from .... import opcodes as OperandDef
+from .... import opcodes
 from ....core.entity.output_types import get_output_types
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
@@ -27,7 +27,7 @@ from ...utils import convert_to_tensor_or_dataframe
 class ToDMatrix(Operator, TileableOperatorMixin):
-    _op_type_ = OperandDef.TO_DMATRIX
+    _op_type_ = opcodes.TO_DMATRIX
     data = KeyField("data", default=None)
     label = KeyField("label", default=None)
@@ -99,10 +99,7 @@ def check_array_like(y: TileableType, name: str) -> TileableType:
     y = convert_to_tensor_or_dataframe(y)
     if isinstance(y, DATAFRAME_TYPE):
         y = y.iloc[:, 0]
-    y = astensor(y)
-    if y.ndim != 1:
-        raise ValueError(f"Expecting 1-d {name}, got: {y.ndim}-d")
-    return y
+    return astensor(y)
 def to_dmatrix(

maxframe/learn/contrib/xgboost/predict.py CHANGED Viewed

@@ -12,29 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import pickle
 import numpy as np
 import pandas as pd
-from .... import opcodes as OperandDef
+from .... import opcodes
 from ....core.entity.output_types import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
 from ....dataframe.utils import parse_index
-from ....serialization.serializables import BoolField, BytesField, KeyField, TupleField
+from ....serialization.serializables import (
+    BoolField,
+    KeyField,
+    ReferenceField,
+    TupleField,
+)
 from ....tensor.core import TENSOR_TYPE, TensorOrder
+from .core import BoosterData
 from .dmatrix import check_data
 class XGBPredict(Operator, TileableOperatorMixin):
-    _op_type_ = OperandDef.XGBOOST_PREDICT
+    _op_type_ = opcodes.XGBOOST_PREDICT
     output_dtype = np.dtype(np.float32)
     data = KeyField("data", default=None)
-    model = BytesField(
-        "model", on_serialize=pickle.dumps, on_deserialize=pickle.loads, default=None
-    )
+    model = ReferenceField("model", reference_type=BoosterData, default=None)
     pred_leaf = BoolField("pred_leaf", default=False)
     pred_contribs = BoolField("pred_contribs", default=False)
     approx_contribs = BoolField("approx_contribs", default=False)
@@ -107,6 +110,17 @@ def predict(
     strict_shape=False,
     flag=False,
 ):
+    """
+    Using MaxFrame XGBoost model to predict data.
+    Parameters
+    ----------
+    Parameters are the same as `xgboost.train`. The predict() is lazy-execution mode.
+    Returns
+    -------
+    results: Booster
+    """
     data = check_data(data)
     # TODO: check model datatype

maxframe/learn/contrib/xgboost/regressor.py CHANGED Viewed

@@ -41,11 +41,6 @@ else:
         ):
             session = kw.pop("session", None)
             run_kwargs = kw.pop("run_kwargs", dict())
-            if kw:
-                raise TypeError(
-                    f"fit got an unexpected keyword argument '{next(iter(kw))}'"
-                )
             dtrain, evals = wrap_evaluation_matrices(
                 None,
                 X,
@@ -57,6 +52,8 @@ else:
                 base_margin_eval_set,
             )
             params = self.get_xgb_params()
+            if not params.get("objective"):
+                params["objective"] = "reg:squarederror"
             self.evals_result_ = dict()
             result = train(
                 params,
@@ -71,8 +68,4 @@ else:
             return self
         def predict(self, data, **kw):
-            session = kw.pop("session", None)
-            run_kwargs = kw.pop("run_kwargs", None)
-            return predict(
-                self.get_booster(), data, session=session, run_kwargs=run_kwargs, **kw
-            )
+            return predict(self.get_booster(), data, **kw)