PyPI - maxframe - Versions diffs - 0.1.0b5__cp310-cp310-macosx_10_9_universal2.whl → 1.0.0rc2__cp310-cp310-macosx_10_9_universal2.whl - Mend

maxframe 0.1.0b5__cp310-cp310-macosx_10_9_universal2.whl → 1.0.0rc2__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (92) hide show

maxframe/_utils.cpython-310-darwin.so +0 -0
maxframe/codegen.py +6 -2
maxframe/config/config.py +38 -2
maxframe/config/validators.py +1 -0
maxframe/conftest.py +2 -0
maxframe/core/__init__.py +0 -3
maxframe/core/entity/__init__.py +1 -8
maxframe/core/entity/objects.py +3 -45
maxframe/core/graph/core.cpython-310-darwin.so +0 -0
maxframe/core/graph/core.pyx +4 -4
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +5 -55
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
maxframe/dataframe/core.py +5 -5
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +6 -0
maxframe/dataframe/datasource/read_odps_table.py +2 -1
maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
maxframe/dataframe/datastore/tests/__init__.py +13 -0
maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
maxframe/dataframe/datastore/to_odps.py +21 -0
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/align.py +1 -1
maxframe/dataframe/indexing/rename.py +3 -37
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/merge/merge.py +236 -2
maxframe/dataframe/merge/tests/test_merge.py +123 -0
maxframe/dataframe/misc/apply.py +5 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +4 -25
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/memory_usage.py +2 -2
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/tests/test_misc.py +23 -0
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/corr.py +3 -3
maxframe/dataframe/statistics/quantile.py +5 -17
maxframe/dataframe/utils.py +4 -7
maxframe/errors.py +13 -0
maxframe/extension.py +12 -0
maxframe/learn/contrib/xgboost/dmatrix.py +2 -2
maxframe/learn/contrib/xgboost/predict.py +2 -2
maxframe/learn/contrib/xgboost/train.py +2 -2
maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
maxframe/lib/mmh3.pyi +43 -0
maxframe/lib/wrapped_pickle.py +2 -1
maxframe/odpsio/__init__.py +1 -1
maxframe/odpsio/arrow.py +8 -4
maxframe/odpsio/schema.py +10 -7
maxframe/odpsio/tableio.py +388 -14
maxframe/odpsio/tests/test_schema.py +16 -15
maxframe/odpsio/tests/test_tableio.py +48 -21
maxframe/protocol.py +148 -12
maxframe/serialization/core.cpython-310-darwin.so +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +3 -0
maxframe/serialization/core.pyx +54 -25
maxframe/serialization/exception.py +1 -1
maxframe/serialization/pandas.py +7 -2
maxframe/serialization/serializables/core.py +158 -12
maxframe/serialization/serializables/tests/test_serializable.py +46 -4
maxframe/tensor/__init__.py +59 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
maxframe/tensor/base/atleast_1d.py +1 -1
maxframe/tensor/base/unique.py +3 -3
maxframe/tensor/reduction/count_nonzero.py +1 -1
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tests/test_protocol.py +34 -0
maxframe/tests/test_utils.py +0 -12
maxframe/tests/utils.py +11 -2
maxframe/utils.py +24 -13
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/METADATA +75 -2
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/RECORD +91 -89
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/WHEEL +1 -1
maxframe_client/__init__.py +0 -1
maxframe_client/fetcher.py +38 -27
maxframe_client/session/odps.py +50 -10
maxframe_client/session/task.py +41 -20
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +49 -2
maxframe_client/clients/spe.py +0 -104
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/top_level.txt +0 -0

maxframe/dataframe/utils.py CHANGED Viewed

@@ -26,7 +26,6 @@ import numpy as np
 import pandas as pd
 from pandas.api.extensions import ExtensionDtype
 from pandas.api.types import is_string_dtype
-from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.inference import is_dict_like, is_list_like
 from ..core import Entity, ExecutableTuple
@@ -477,11 +476,11 @@ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
     else:
         fill_values = fill_value
-    from .core import SERIES_TYPE
+    from .core import INDEX_TYPE, SERIES_TYPE
     dtypes = (
         pd.Series([df_obj.dtype], index=[df_obj.name])
-        if isinstance(df_obj, SERIES_TYPE)
+        if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
         else df_obj.dtypes
     )
     for size, fill_value in zip(sizes, fill_values):
@@ -593,7 +592,7 @@ def build_series(
     return ret_series
-def infer_index_value(left_index_value, right_index_value):
+def infer_index_value(left_index_value, right_index_value, level=None):
     from .core import IndexValue
     if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
@@ -616,9 +615,7 @@ def infer_index_value(left_index_value, right_index_value):
     left_index = left_index_value.to_pandas()
     right_index = right_index_value.to_pandas()
-    out_index = pd.Index(
-        [], dtype=find_common_type([left_index.dtype, right_index.dtype])
-    )
+    out_index = left_index.join(right_index, level=level)[:0]
     return parse_index(out_index, left_index_value, right_index_value)

maxframe/errors.py CHANGED Viewed

@@ -17,5 +17,18 @@ class MaxFrameError(Exception):
     pass
+class MaxFrameIntentionalError(MaxFrameError):
+    pass
 class MaxFrameUserError(MaxFrameError):
     pass
+class NoTaskServerResponseError(MaxFrameError):
+    pass
+class SessionAlreadyClosedError(MaxFrameError):
+    def __init__(self, session_id: str):
+        super().__init__(f"Session {session_id} is already closed")

maxframe/extension.py CHANGED Viewed

@@ -48,6 +48,18 @@ class MaxFrameExtension(metaclass=abc.ABCMeta):
         """
         pass
+    @classmethod
+    async def reload_session(cls, session_id: str) -> None:
+        """
+        Reload the session state when the session is recovered from failover.
+        Parameters
+        ----------
+        session_id : str
+            The session id.
+        """
+        pass
     @classmethod
     def init_service_extension(cls) -> None:
         """

maxframe/learn/contrib/xgboost/dmatrix.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
-from .... import opcodes as OperandDef
+from .... import opcodes
 from ....core.entity.output_types import get_output_types
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
@@ -27,7 +27,7 @@ from ...utils import convert_to_tensor_or_dataframe
 class ToDMatrix(Operator, TileableOperatorMixin):
-    _op_type_ = OperandDef.TO_DMATRIX
+    _op_type_ = opcodes.TO_DMATRIX
     data = KeyField("data", default=None)
     label = KeyField("label", default=None)

maxframe/learn/contrib/xgboost/predict.py CHANGED Viewed

@@ -17,7 +17,7 @@ import pickle
 import numpy as np
 import pandas as pd
-from .... import opcodes as OperandDef
+from .... import opcodes
 from ....core.entity.output_types import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
@@ -28,7 +28,7 @@ from .dmatrix import check_data
 class XGBPredict(Operator, TileableOperatorMixin):
-    _op_type_ = OperandDef.XGBOOST_PREDICT
+    _op_type_ = opcodes.XGBOOST_PREDICT
     output_dtype = np.dtype(np.float32)
     data = KeyField("data", default=None)

maxframe/learn/contrib/xgboost/train.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import logging
 from collections import OrderedDict
-from .... import opcodes as OperandDef
+from .... import opcodes
 from ....core import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
@@ -41,7 +41,7 @@ def _on_serialize_evals(evals_val):
 class XGBTrain(Operator, TileableOperatorMixin):
-    _op_type_ = OperandDef.XGBOOST_TRAIN
+    _op_type_ = opcodes.XGBOOST_TRAIN
     params = DictField("params", key_type=FieldTypes.string, default=None)
     dtrain = KeyField("dtrain", default=None)

maxframe/lib/mmh3.cpython-310-darwin.so CHANGED Viewed

Binary file

maxframe/lib/mmh3.pyi ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+def hash(key, seed=0, signed=True) -> int:
+    """
+    Return a 32 bit integer.
+    """
+def hash_from_buffer(key, seed=0, signed=True) -> int:
+    """
+    Return a 32 bit integer. Designed for large memory-views such as numpy arrays.
+    """
+def hash64(key, seed=0, x64arch=True, signed=True) -> Tuple[int, int]:
+    """
+    Return a tuple of two 64 bit integers for a string. Optimized for
+    the x64 bit architecture when x64arch=True, otherwise for x86.
+    """
+def hash128(key, seed=0, x64arch=True, signed=False) -> int:
+    """
+    Return a 128 bit long integer. Optimized for the x64 bit architecture
+    when x64arch=True, otherwise for x86.
+    """
+def hash_bytes(key, seed=0, x64arch=True) -> bytes:
+    """
+    Return a 128 bit hash value as bytes for a string. Optimized for the
+    x64 bit architecture when x64arch=True, otherwise for the x86.
+    """

maxframe/lib/wrapped_pickle.py CHANGED Viewed

@@ -120,7 +120,8 @@ class _UnpickleSwitch:
             @functools.wraps(func)
             async def wrapped(*args, **kwargs):
                 with _UnpickleSwitch(forbidden=self._forbidden):
-                    return await func(*args, **kwargs)
+                    ret = await func(*args, **kwargs)
+                return ret
         else:

maxframe/odpsio/__init__.py CHANGED Viewed

@@ -18,4 +18,4 @@ from .schema import (
     odps_schema_to_pandas_dtypes,
     pandas_to_odps_schema,
 )
-from .tableio import HaloTableIO
+from .tableio import HaloTableIO, ODPSTableIO

maxframe/odpsio/arrow.py CHANGED Viewed

@@ -45,9 +45,13 @@ def _rebuild_dataframe(
 def _rebuild_index(df: pd.DataFrame, table_meta: DataFrameTableMeta) -> pd.Index:
     if df.shape[1] > 1:
-        df.columns = pd.Index(table_meta.pd_index_level_names)
-        return pd.MultiIndex.from_frame(df)
-    return pd.Index(df.iloc[:, 0], name=table_meta.pd_index_level_names[0])
+        idx = pd.MultiIndex.from_frame(df)
+        idx.names = table_meta.pd_index_level_names
+    else:
+        # make sure even if None names are updated properly
+        idx = pd.Index(df.iloc[:, 0])
+        idx.name = table_meta.pd_index_level_names[0]
+    return idx
 def arrow_to_pandas(
@@ -75,7 +79,7 @@ def pandas_to_arrow(
         df.columns = pd.Index(table_meta.table_column_names)
         if not ignore_index:
             df = df.rename_axis(table_meta.table_index_column_names).reset_index()
-    elif ignore_index:
+    elif ignore_index and table_meta.type != OutputType.index:
         df = pd.DataFrame([], columns=[])
     elif table_meta.type == OutputType.index:
         names = [f"_idx_{idx}" for idx in range(len(df.names))]

maxframe/odpsio/schema.py CHANGED Viewed

@@ -126,10 +126,15 @@ def odps_type_to_arrow_type(
             ]
             col_type = pa.struct(fields)
         elif isinstance(odps_type, odps_types.Decimal):
-            col_type = pa.decimal128(
-                odps_type.precision or odps_types.Decimal._max_precision,
-                odps_type.scale or odps_types.Decimal._max_scale,
-            )
+            if odps_type.name == "decimal":
+                # legacy decimal data without precision or scale
+                # precision data from internal compat mode
+                col_type = pa.decimal128(38, 18)
+            else:
+                col_type = pa.decimal128(
+                    odps_type.precision or odps_types.Decimal._max_precision,
+                    odps_type.scale or odps_types.Decimal._max_scale,
+                )
         elif isinstance(odps_type, (odps_types.Varchar, odps_types.Char)):
             col_type = pa.string()
         else:
@@ -289,8 +294,6 @@ def build_dataframe_table_meta(
     else:  # pragma: no cover
         raise TypeError(f"Cannot accept type {type(df_obj)}")
-    assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
     if obj_type == OutputType.scalar:
         pd_dtypes = pd.Series([])
         column_index_names = []
@@ -346,7 +349,7 @@ def build_dataframe_table_meta(
     else:
         index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
-    if ignore_index:
+    if ignore_index and obj_type != OutputType.index:
         table_index_column_names = []
         pd_index_dtypes = pd.Series([], index=[])
     else: