PyPI - maxframe - Versions diffs - 1.0.0rc3__cp311-cp311-win_amd64.whl → 1.0.0rc4__cp311-cp311-win_amd64.whl - Mend

maxframe 1.0.0rc3__cp311-cp311-win_amd64.whl → 1.0.0rc4__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (57) hide show

maxframe/_utils.cp311-win_amd64.pyd +0 -0
maxframe/codegen.py +1 -0
maxframe/config/config.py +13 -1
maxframe/conftest.py +43 -12
maxframe/core/entity/executable.py +1 -1
maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
maxframe/dataframe/arithmetic/docstring.py +26 -2
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/core.py +2 -0
maxframe/dataframe/datasource/read_odps_query.py +66 -7
maxframe/dataframe/datasource/read_odps_table.py +3 -1
maxframe/dataframe/datasource/tests/test_datasource.py +35 -6
maxframe/dataframe/datastore/to_odps.py +7 -0
maxframe/dataframe/extensions/__init__.py +3 -0
maxframe/dataframe/extensions/flatmap.py +326 -0
maxframe/dataframe/extensions/tests/test_extensions.py +62 -1
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/rename.py +11 -0
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/misc/drop_duplicates.py +18 -1
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/io/odpsio/schema.py +5 -3
maxframe/io/odpsio/tableio.py +44 -38
maxframe/io/odpsio/tests/test_schema.py +0 -4
maxframe/io/odpsio/volumeio.py +9 -3
maxframe/learn/contrib/__init__.py +2 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/xgboost/classifier.py +3 -3
maxframe/learn/contrib/xgboost/predict.py +8 -39
maxframe/learn/contrib/xgboost/train.py +4 -3
maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
maxframe/opcodes.py +3 -0
maxframe/protocol.py +6 -1
maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
maxframe/session.py +9 -2
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/concatenate.py +23 -20
maxframe/tensor/merge/vstack.py +5 -1
maxframe/tensor/misc/transpose.py +1 -1
maxframe/utils.py +34 -12
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/METADATA +1 -1
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/RECORD +57 -52
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/WHEEL +1 -1
maxframe_client/fetcher.py +10 -8
maxframe_client/session/consts.py +3 -0
maxframe_client/session/odps.py +84 -13
maxframe_client/session/task.py +58 -20
maxframe_client/tests/test_session.py +14 -2
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/top_level.txt +0 -0

maxframe/io/odpsio/tableio.py CHANGED Viewed

@@ -18,6 +18,7 @@ from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from typing import Dict, List, Optional, Union
+import numpy as np
 import pyarrow as pa
 from odps import ODPS
 from odps import __version__ as pyodps_version
@@ -26,7 +27,6 @@ from odps.apis.storage_api import (
     TableBatchScanResponse,
     TableBatchWriteResponse,
 )
-from odps.config import option_context as pyodps_option_context
 from odps.tunnel import TableTunnel
 from odps.types import OdpsSchema, PartitionSpec, timestamp_ntz
@@ -38,19 +38,13 @@ except ImportError:
 from ...config import options
 from ...env import ODPS_STORAGE_API_ENDPOINT
 from ...lib.version import Version
+from ...utils import sync_pyodps_options
 from .schema import odps_schema_to_arrow_schema
 PartitionsType = Union[List[str], str, None]
 _DEFAULT_ROW_BATCH_SIZE = 4096
-_need_convert_timezone = Version(pyodps_version) < Version("0.11.7")
-@contextmanager
-def _sync_pyodps_timezone():
-    with pyodps_option_context() as cfg:
-        cfg.local_timezone = options.local_timezone
-        yield
+_need_patch_batch = Version(pyodps_version) < Version("0.12.0")
 class ODPSTableIO(ABC):
@@ -166,10 +160,15 @@ class TunnelMultiPartitionReader:
             self._cur_partition_id += 1
             part_str = self._partitions[self._cur_partition_id]
-            with _sync_pyodps_timezone():
+            # todo make this more formal when PyODPS 0.12.0 is released
+            req_columns = self._columns
+            if not _need_patch_batch:
+                req_columns = self._schema.names
+            with sync_pyodps_options():
                 self._cur_reader = self._table.open_reader(
                     part_str,
-                    columns=self._columns,
+                    columns=req_columns,
                     arrow=True,
                     download_id=self._partition_to_download_ids.get(part_str),
                 )
@@ -180,7 +179,7 @@ class TunnelMultiPartitionReader:
                 else:
                     count = min(self._count, self._cur_reader.count - start)
-                with _sync_pyodps_timezone():
+                with sync_pyodps_options():
                     self._reader_iter = self._cur_reader.read(start, count)
                 break
             self._reader_start_pos += self._cur_reader.count
@@ -194,7 +193,7 @@ class TunnelMultiPartitionReader:
         arrays = []
         for idx in range(batch.num_columns):
             col = batch.column(idx)
-            if _need_convert_timezone and isinstance(col.type, pa.TimestampType):
+            if isinstance(col.type, pa.TimestampType):
                 if col.type.tz is not None:
                     target_type = pa.timestamp(
                         self._schema.types[idx].unit, col.type.tz
@@ -212,11 +211,12 @@ class TunnelMultiPartitionReader:
         for part_col in self._partition_cols or []:
             names.append(part_col)
             col_type = self._schema.field_by_name(part_col).type
-            arrays.append(pa.array([pt_spec[part_col]] * batch.num_rows).cast(col_type))
+            pt_col = np.repeat([pt_spec[part_col]], batch.num_rows)
+            arrays.append(pa.array(pt_col).cast(col_type))
         return pa.RecordBatch.from_arrays(arrays, names)
     def read(self):
-        with _sync_pyodps_timezone():
+        with sync_pyodps_options():
             if self._cur_reader is None:
                 self._open_next_reader()
                 if self._cur_reader is None:
@@ -227,7 +227,10 @@ class TunnelMultiPartitionReader:
                     if batch is not None:
                         if self._row_left is not None:
                             self._row_left -= batch.num_rows
-                        return self._fill_batch_partition(batch)
+                        if _need_patch_batch:
+                            return self._fill_batch_partition(batch)
+                        else:
+                            return batch
                 except StopIteration:
                     self._open_next_reader()
             return None
@@ -285,7 +288,9 @@ class TunnelTableIO(ODPSTableIO):
         reverse_range: bool = False,
         row_batch_size: int = _DEFAULT_ROW_BATCH_SIZE,
     ):
-        table = self._odps.get_table(full_table_name)
+        with sync_pyodps_options():
+            table = self._odps.get_table(full_table_name)
         if partition_columns is True:
             partition_columns = [c.name for c in table.table_schema.partitions]
@@ -296,21 +301,22 @@ class TunnelTableIO(ODPSTableIO):
             or (stop is not None and stop < 0)
             or (reverse_range and start is None)
         ):
-            table = self._odps.get_table(full_table_name)
-            tunnel = TableTunnel(self._odps)
-            parts = (
-                [partitions]
-                if partitions is None or isinstance(partitions, str)
-                else partitions
-            )
-            part_to_down_id = dict()
-            total_records = 0
-            for part in parts:
-                down_session = tunnel.create_download_session(
-                    table, async_mode=True, partition_spec=part
+            with sync_pyodps_options():
+                table = self._odps.get_table(full_table_name)
+                tunnel = TableTunnel(self._odps)
+                parts = (
+                    [partitions]
+                    if partitions is None or isinstance(partitions, str)
+                    else partitions
                 )
-                part_to_down_id[part] = down_session.id
-                total_records += down_session.count
+                part_to_down_id = dict()
+                total_records = 0
+                for part in parts:
+                    down_session = tunnel.create_download_session(
+                        table, async_mode=True, partition_spec=part
+                    )
+                    part_to_down_id[part] = down_session.id
+                    total_records += down_session.count
         count = None
         if start is not None or stop is not None:
@@ -347,7 +353,7 @@ class TunnelTableIO(ODPSTableIO):
         overwrite: bool = True,
     ):
         table = self._odps.get_table(full_table_name)
-        with _sync_pyodps_timezone():
+        with sync_pyodps_options():
             with table.open_writer(
                 partition=partition,
                 arrow=True,
@@ -357,7 +363,7 @@ class TunnelTableIO(ODPSTableIO):
                 # fixme should yield writer directly once pyodps fixes
                 #  related arrow timestamp bug when provided schema and
                 #  table schema is identical.
-                if _need_convert_timezone:
+                if _need_patch_batch:
                     yield TunnelWrappedWriter(writer)
                 else:
                     yield writer
@@ -596,8 +602,8 @@ class HaloTableIO(ODPSTableIO):
     ):
         from odps.apis.storage_api import (
             SessionRequest,
+            SessionStatus,
             SplitOptions,
-            Status,
             TableBatchScanRequest,
         )
@@ -628,13 +634,13 @@ class HaloTableIO(ODPSTableIO):
         resp = client.create_read_session(req)
         session_id = resp.session_id
-        status = resp.status
-        while status == Status.WAIT:
+        status = resp.session_status
+        while status == SessionStatus.INIT:
             resp = client.get_read_session(SessionRequest(session_id))
-            status = resp.status
+            status = resp.session_status
             time.sleep(1.0)
-        assert status == Status.OK
+        assert status == SessionStatus.NORMAL
         count = None
         if start is not None or stop is not None:

maxframe/io/odpsio/tests/test_schema.py CHANGED Viewed

@@ -270,10 +270,6 @@ def test_odps_arrow_schema_conversion():
     with pytest.raises(TypeError):
         arrow_schema_to_odps_schema(pa.schema([("col1", pa.float16())]))
-    with pytest.raises(TypeError):
-        odps_schema_to_arrow_schema(
-            odps_types.OdpsSchema([odps_types.Column("col1", "json")])
-        )
 def test_build_column_name():

maxframe/io/odpsio/volumeio.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import inspect
-from typing import Iterator, List, Union
+from typing import Iterator, List, Optional, Union
 from odps import ODPS
@@ -43,9 +43,15 @@ class ODPSVolumeReader:
 class ODPSVolumeWriter:
-    def __init__(self, odps_entry: ODPS, volume_name: str, volume_dir: str):
+    def __init__(
+        self,
+        odps_entry: ODPS,
+        volume_name: str,
+        volume_dir: str,
+        schema_name: Optional[str] = None,
+    ):
         self._odps_entry = odps_entry
-        self._volume = odps_entry.get_volume(volume_name)
+        self._volume = odps_entry.get_volume(volume_name, schema=schema_name)
         self._volume_dir = volume_dir
     def write_file(self, file_name: str, data: Union[bytes, Iterator[bytes]]):

maxframe/learn/contrib/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import pytorch
+from . import graph, pytorch
 del pytorch
+del graph

maxframe/learn/contrib/graph/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .connected_components import connected_components

maxframe/learn/contrib/graph/connected_components.py ADDED Viewed

@@ -0,0 +1,215 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pandas as pd
+from maxframe import opcodes
+from ....core import OutputType
+from ....dataframe.operators import DataFrameOperator, DataFrameOperatorMixin
+from ....dataframe.utils import make_dtypes, parse_index
+from ....serialization.serializables import Int32Field, StringField
+class DataFrameConnectedComponentsOperator(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.CONNECTED_COMPONENTS
+    vertex_col1 = StringField("vertex_col1", default=None)
+    vertex_col2 = StringField("vertex_col2", default=None)
+    max_iter = Int32Field("max_iter", default=6)
+    def __call__(self, df):
+        node_id_dtype = df.dtypes[self.vertex_col1]
+        dtypes = make_dtypes({"id": node_id_dtype, "component": node_id_dtype})
+        # this will return a dataframe and a bool flag
+        new_dataframe_tileable_kw = {
+            "shape": (np.nan, 2),
+            "index_value": parse_index(pd.RangeIndex(0)),
+            "columns_value": parse_index(dtypes.index, store_data=True),
+            "dtypes": dtypes,
+        }
+        new_scalar_tileable_kw = {"dtype": np.dtype(np.bool_), "shape": ()}
+        return self.new_tileables(
+            [df],
+            kws=[new_dataframe_tileable_kw, new_scalar_tileable_kw],
+        )
+    @property
+    def output_limit(self):
+        return 2
+def connected_components(
+    dataframe, vertex_col1: str, vertex_col2: str, max_iter: int = 6
+):
+    """
+    The connected components algorithm labels each node as belonging to a specific connected component with the ID of
+    its lowest-numbered vertex.
+    Parameters
+    ----------
+    dataframe : DataFrame
+        A DataFrame containing the edges of the graph.
+    vertex_col1 : str
+        The name of the column in `dataframe` that contains the one of edge vertices. The column value must be an
+        integer.
+    vertex_col2 : str
+        The name of the column in `dataframe` that contains the other one of edge vertices. The column value must be an
+        integer.
+    max_iter : int
+        The algorithm use large and small star transformation to find all connected components, `max_iter`
+        controls the max round of the iterations before finds all edges. Default is 6.
+    Returns
+    -------
+    DataFrame
+        Return dataFrame contains all connected component edges by two columns `id` and `component`. `component` is
+        the lowest-numbered vertex in the connected components.
+    Notes
+    -------
+    After `execute()`, the dataframe has a bool member `flag` to indicate if the `connected_components` already
+    converged in `max_iter` rounds. `True` means the dataframe already contains all edges of the connected components.
+    If `False` you can run `connected_components` more times to reach the converged state.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import maxframe.dataframe as md
+    >>> import maxframe.learn.contrib.graph.connected_components
+    >>> df = md.DataFrame({'x': [4, 1], 'y': [0, 4]})
+    >>> df.execute()
+       x  y
+    0  4  1
+    1  0  4
+    Get connected components with 1 round iteration.
+    >>> components, converged = connected_components(df, "x", "y", 1)
+    >>> session.execute(components, converged)
+    >>> components
+       A   B
+    0  1   0
+    1  4   0
+    >>> converged
+    True
+    Sometimes, a single iteration may not be sufficient to propagate the connectivity of all edges.
+    By default, `connected_components` performs 6 iterations of calculations.
+    If you are unsure whether the connected components have converged, you can check the `flag` variable in
+    the output DataFrame after calling `execute()`.
+    >>> df = md.DataFrame({'x': [4, 1, 7, 5, 8, 11, 11], 'y': [0, 4, 4, 7, 7, 9, 13]})
+    >>> df.execute()
+        x   y
+    0   4   0
+    1   1   4
+    2   7   4
+    3   5   7
+    4   8   7
+    5  11   9
+    6  11  13
+    >>> components, converged = connected_components(df, "x", "y", 1)
+    >>> session.execute(components, converged)
+    >>> components
+       id  component
+    0   4          0
+    1   7          0
+    2   8          4
+    3  13          9
+    4   1          0
+    5   5          0
+    6  11          9
+    If `flag` is True, it means convergence has been achieved.
+    >>> converged
+    False
+    You can determine whether to continue iterating or to use a larger number of iterations
+    (but not too large, which would result in wasted computational overhead).
+    >>> components, converged = connected_components(components, "id", "component", 1)
+    >>> session.execute(components, converged)
+    >>> components
+       id  component
+    0   4          0
+    1   7          0
+    2  13          9
+    3   1          0
+    4   5          0
+    5  11          9
+    6   8          0
+    >>> components, converged = connected_components(df, "x", "y")
+    >>> session.execute(components, converged)
+    >>> components
+       id  component
+    0   4          0
+    1   7          0
+    2  13          9
+    3   1          0
+    4   5          0
+    5  11          9
+    6   8          0
+    """
+    # Check if vertex columns are provided
+    if not vertex_col1 or not vertex_col2:
+        raise ValueError("Both vertex_col1 and vertex_col2 must be provided.")
+    # Check if max_iter is provided and within the valid range
+    if max_iter is None:
+        raise ValueError("max_iter must be provided.")
+    if not (1 <= max_iter <= 50):
+        raise ValueError("max_iter must be an integer between 1 and 50.")
+    # Verify that the vertex columns exist in the dataframe
+    missing_cols = [
+        col for col in (vertex_col1, vertex_col2) if col not in dataframe.dtypes
+    ]
+    if missing_cols:
+        raise ValueError(
+            f"The following required columns {missing_cols} are not in {list(dataframe.dtypes.index)}"
+        )
+    # Ensure that the vertex columns are of integer type
+    # TODO support string dtype
+    incorrect_dtypes = [
+        col
+        for col in (vertex_col1, vertex_col2)
+        if dataframe[col].dtype != np.dtype("int")
+    ]
+    if incorrect_dtypes:
+        dtypes_str = ", ".join(str(dataframe[col].dtype) for col in incorrect_dtypes)
+        raise ValueError(
+            f"Columns {incorrect_dtypes} should be of integer type, but found {dtypes_str}."
+        )
+    op = DataFrameConnectedComponentsOperator(
+        vertex_col1=vertex_col1,
+        vertex_col2=vertex_col2,
+        _output_types=[OutputType.dataframe, OutputType.scalar],
+        max_iter=max_iter,
+    )
+    return op(
+        dataframe,
+    )

maxframe/learn/contrib/graph/tests/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

maxframe/learn/contrib/graph/tests/test_connected_components.py ADDED Viewed

@@ -0,0 +1,53 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from ..... import dataframe as md
+from .....dataframe.core import DataFrameData
+from .....tensor.core import TensorData
+from .. import connected_components
+@pytest.fixture
+def df1():
+    return md.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
+@pytest.fixture
+def df2():
+    return md.DataFrame(
+        [[1, "2"], [1, "2"]],
+        columns=["a", "b"],
+    )
+def test_connected_components(df1, df2):
+    edges, flag = connected_components(df1, "a", "b")
+    assert edges.op.max_iter == 6
+    assert edges.shape == (np.nan, 2)
+    assert isinstance(edges.data, DataFrameData)
+    assert isinstance(flag.data, TensorData)
+    assert flag.shape == ()
+    assert "id" in edges.dtypes and "component" in edges.dtypes
+    with pytest.raises(ValueError):
+        connected_components(df1, "a", "x")
+    with pytest.raises(ValueError):
+        connected_components(df1, "a", "b", 0)
+    with pytest.raises(ValueError):
+        connected_components(df2, "a", "b")

maxframe/learn/contrib/xgboost/classifier.py CHANGED Viewed

@@ -14,7 +14,8 @@
 import numpy as np
-from ....tensor import argmax, transpose, vstack
+from ....tensor import argmax, transpose
+from ....tensor.merge.vstack import _vstack
 from ..utils import make_import_error_func
 from .core import XGBScikitLearnBase, xgboost
@@ -89,7 +90,6 @@ else:
             if ntree_limit is not None:
                 raise NotImplementedError("ntree_limit is not currently supported")
             prediction = predict(self.get_booster(), data, flag=flag, **kw)
             if len(prediction.shape) == 2 and prediction.shape[1] == self.n_classes_:
                 # multi-class
                 return prediction
@@ -103,7 +103,7 @@ else:
             # binary logistic function
             classone_probs = prediction
             classzero_probs = 1.0 - classone_probs
-            return transpose(vstack((classzero_probs, classone_probs)))
+            return transpose(_vstack((classzero_probs, classone_probs)))
         @property
         def classes_(self) -> np.ndarray:

maxframe/learn/contrib/xgboost/predict.py CHANGED Viewed

@@ -14,20 +14,18 @@
 import numpy as np
-import pandas as pd
 from .... import opcodes
 from ....core.entity.output_types import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
-from ....dataframe.utils import parse_index
 from ....serialization.serializables import (
     BoolField,
     KeyField,
     ReferenceField,
     TupleField,
 )
-from ....tensor.core import TENSOR_TYPE, TensorOrder
+from ....tensor.core import TensorOrder
 from .core import BoosterData
 from .dmatrix import check_data
@@ -65,35 +63,12 @@ class XGBPredict(Operator, TileableOperatorMixin):
         else:
             shape = (self.data.shape[0],)
         inputs = [self.data, self.model]
-        if self.output_types[0] == OutputType.tensor:
-            # tensor
-            return self.new_tileable(
-                inputs,
-                shape=shape,
-                dtype=self.output_dtype,
-                order=TensorOrder.C_ORDER,
-            )
-        elif self.output_types[0] == OutputType.dataframe:
-            # dataframe
-            dtypes = pd.DataFrame(
-                np.random.rand(0, num_class), dtype=self.output_dtype
-            ).dtypes
-            return self.new_tileable(
-                inputs,
-                shape=shape,
-                dtypes=dtypes,
-                columns_value=parse_index(dtypes.index),
-                index_value=self.data.index_value,
-            )
-        else:
-            # series
-            return self.new_tileable(
-                inputs,
-                shape=shape,
-                index_value=self.data.index_value,
-                name="predictions",
-                dtype=self.output_dtype,
-            )
+        return self.new_tileable(
+            inputs,
+            shape=shape,
+            dtype=self.output_dtype,
+            order=TensorOrder.C_ORDER,
+        )
 def predict(
@@ -124,13 +99,7 @@ def predict(
     data = check_data(data)
     # TODO: check model datatype
-    num_class = getattr(model.op, "num_class", None)
-    if isinstance(data, TENSOR_TYPE):
-        output_types = [OutputType.tensor]
-    elif num_class is not None:
-        output_types = [OutputType.dataframe]
-    else:
-        output_types = [OutputType.series]
+    output_types = [OutputType.tensor]
     iteration_range = iteration_range or (0, 0)

maxframe/learn/contrib/xgboost/train.py CHANGED Viewed

@@ -102,7 +102,7 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
     Parameters
     ----------
     Parameters are the same as `xgboost.train`. Note that train is an eager-execution
-    API. The call will be blocked until training finished.
+    API if evals is passed, thus the call will be blocked until training finished.
     Returns
     -------
@@ -121,11 +121,12 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
                 processed_evals.append((eval_dmatrix, name))
             else:
                 processed_evals.append((to_dmatrix(eval_dmatrix), name))
-    return XGBTrain(
+    data = XGBTrain(
         params=params,
         dtrain=dtrain,
         evals=processed_evals,
         evals_result=evals_result,
         num_class=num_class,
         **kwargs,
-    )(evals_result).execute(session=session, **run_kwargs)
+    )(evals_result)
+    return data.execute(session=session, **run_kwargs) if evals else data

maxframe/lib/mmh3.cp311-win_amd64.pyd CHANGED Viewed

Binary file