maxframe 0.1.0b5__cp310-cp310-macosx_10_9_universal2.whl → 1.0.0rc2__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/codegen.py +6 -2
- maxframe/config/config.py +38 -2
- maxframe/config/validators.py +1 -0
- maxframe/conftest.py +2 -0
- maxframe/core/__init__.py +0 -3
- maxframe/core/entity/__init__.py +1 -8
- maxframe/core/entity/objects.py +3 -45
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +5 -55
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
- maxframe/dataframe/core.py +5 -5
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +6 -0
- maxframe/dataframe/datasource/read_odps_table.py +2 -1
- maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +21 -0
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/rename.py +3 -37
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/merge/merge.py +236 -2
- maxframe/dataframe/merge/tests/test_merge.py +123 -0
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +4 -25
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +23 -0
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +5 -17
- maxframe/dataframe/utils.py +4 -7
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +2 -2
- maxframe/learn/contrib/xgboost/predict.py +2 -2
- maxframe/learn/contrib/xgboost/train.py +2 -2
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/odpsio/__init__.py +1 -1
- maxframe/odpsio/arrow.py +8 -4
- maxframe/odpsio/schema.py +10 -7
- maxframe/odpsio/tableio.py +388 -14
- maxframe/odpsio/tests/test_schema.py +16 -15
- maxframe/odpsio/tests/test_tableio.py +48 -21
- maxframe/protocol.py +148 -12
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +54 -25
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +7 -2
- maxframe/serialization/serializables/core.py +158 -12
- maxframe/serialization/serializables/tests/test_serializable.py +46 -4
- maxframe/tensor/__init__.py +59 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
- maxframe/tensor/base/atleast_1d.py +1 -1
- maxframe/tensor/base/unique.py +3 -3
- maxframe/tensor/reduction/count_nonzero.py +1 -1
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +11 -2
- maxframe/utils.py +24 -13
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/METADATA +75 -2
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/RECORD +91 -89
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/fetcher.py +38 -27
- maxframe_client/session/odps.py +50 -10
- maxframe_client/session/task.py +41 -20
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +49 -2
- maxframe_client/clients/spe.py +0 -104
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/top_level.txt +0 -0
maxframe/dataframe/utils.py
CHANGED
|
@@ -26,7 +26,6 @@ import numpy as np
|
|
|
26
26
|
import pandas as pd
|
|
27
27
|
from pandas.api.extensions import ExtensionDtype
|
|
28
28
|
from pandas.api.types import is_string_dtype
|
|
29
|
-
from pandas.core.dtypes.cast import find_common_type
|
|
30
29
|
from pandas.core.dtypes.inference import is_dict_like, is_list_like
|
|
31
30
|
|
|
32
31
|
from ..core import Entity, ExecutableTuple
|
|
@@ -477,11 +476,11 @@ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
|
|
|
477
476
|
else:
|
|
478
477
|
fill_values = fill_value
|
|
479
478
|
|
|
480
|
-
from .core import SERIES_TYPE
|
|
479
|
+
from .core import INDEX_TYPE, SERIES_TYPE
|
|
481
480
|
|
|
482
481
|
dtypes = (
|
|
483
482
|
pd.Series([df_obj.dtype], index=[df_obj.name])
|
|
484
|
-
if isinstance(df_obj, SERIES_TYPE)
|
|
483
|
+
if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
|
|
485
484
|
else df_obj.dtypes
|
|
486
485
|
)
|
|
487
486
|
for size, fill_value in zip(sizes, fill_values):
|
|
@@ -593,7 +592,7 @@ def build_series(
|
|
|
593
592
|
return ret_series
|
|
594
593
|
|
|
595
594
|
|
|
596
|
-
def infer_index_value(left_index_value, right_index_value):
|
|
595
|
+
def infer_index_value(left_index_value, right_index_value, level=None):
|
|
597
596
|
from .core import IndexValue
|
|
598
597
|
|
|
599
598
|
if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
|
|
@@ -616,9 +615,7 @@ def infer_index_value(left_index_value, right_index_value):
|
|
|
616
615
|
|
|
617
616
|
left_index = left_index_value.to_pandas()
|
|
618
617
|
right_index = right_index_value.to_pandas()
|
|
619
|
-
out_index =
|
|
620
|
-
[], dtype=find_common_type([left_index.dtype, right_index.dtype])
|
|
621
|
-
)
|
|
618
|
+
out_index = left_index.join(right_index, level=level)[:0]
|
|
622
619
|
return parse_index(out_index, left_index_value, right_index_value)
|
|
623
620
|
|
|
624
621
|
|
maxframe/errors.py
CHANGED
|
@@ -17,5 +17,18 @@ class MaxFrameError(Exception):
|
|
|
17
17
|
pass
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
class MaxFrameIntentionalError(MaxFrameError):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
20
24
|
class MaxFrameUserError(MaxFrameError):
|
|
21
25
|
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class NoTaskServerResponseError(MaxFrameError):
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SessionAlreadyClosedError(MaxFrameError):
|
|
33
|
+
def __init__(self, session_id: str):
|
|
34
|
+
super().__init__(f"Session {session_id} is already closed")
|
maxframe/extension.py
CHANGED
|
@@ -48,6 +48,18 @@ class MaxFrameExtension(metaclass=abc.ABCMeta):
|
|
|
48
48
|
"""
|
|
49
49
|
pass
|
|
50
50
|
|
|
51
|
+
@classmethod
|
|
52
|
+
async def reload_session(cls, session_id: str) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Reload the session state when the session is recovered from failover.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
session_id : str
|
|
59
|
+
The session id.
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
51
63
|
@classmethod
|
|
52
64
|
def init_service_extension(cls) -> None:
|
|
53
65
|
"""
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
from .... import opcodes
|
|
16
|
+
from .... import opcodes
|
|
17
17
|
from ....core.entity.output_types import get_output_types
|
|
18
18
|
from ....core.operator.base import Operator
|
|
19
19
|
from ....core.operator.core import TileableOperatorMixin
|
|
@@ -27,7 +27,7 @@ from ...utils import convert_to_tensor_or_dataframe
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class ToDMatrix(Operator, TileableOperatorMixin):
|
|
30
|
-
_op_type_ =
|
|
30
|
+
_op_type_ = opcodes.TO_DMATRIX
|
|
31
31
|
|
|
32
32
|
data = KeyField("data", default=None)
|
|
33
33
|
label = KeyField("label", default=None)
|
|
@@ -17,7 +17,7 @@ import pickle
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pandas as pd
|
|
19
19
|
|
|
20
|
-
from .... import opcodes
|
|
20
|
+
from .... import opcodes
|
|
21
21
|
from ....core.entity.output_types import OutputType
|
|
22
22
|
from ....core.operator.base import Operator
|
|
23
23
|
from ....core.operator.core import TileableOperatorMixin
|
|
@@ -28,7 +28,7 @@ from .dmatrix import check_data
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class XGBPredict(Operator, TileableOperatorMixin):
|
|
31
|
-
_op_type_ =
|
|
31
|
+
_op_type_ = opcodes.XGBOOST_PREDICT
|
|
32
32
|
output_dtype = np.dtype(np.float32)
|
|
33
33
|
|
|
34
34
|
data = KeyField("data", default=None)
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import logging
|
|
16
16
|
from collections import OrderedDict
|
|
17
17
|
|
|
18
|
-
from .... import opcodes
|
|
18
|
+
from .... import opcodes
|
|
19
19
|
from ....core import OutputType
|
|
20
20
|
from ....core.operator.base import Operator
|
|
21
21
|
from ....core.operator.core import TileableOperatorMixin
|
|
@@ -41,7 +41,7 @@ def _on_serialize_evals(evals_val):
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class XGBTrain(Operator, TileableOperatorMixin):
|
|
44
|
-
_op_type_ =
|
|
44
|
+
_op_type_ = opcodes.XGBOOST_TRAIN
|
|
45
45
|
|
|
46
46
|
params = DictField("params", key_type=FieldTypes.string, default=None)
|
|
47
47
|
dtrain = KeyField("dtrain", default=None)
|
|
Binary file
|
maxframe/lib/mmh3.pyi
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
|
|
17
|
+
def hash(key, seed=0, signed=True) -> int:
|
|
18
|
+
"""
|
|
19
|
+
Return a 32 bit integer.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def hash_from_buffer(key, seed=0, signed=True) -> int:
|
|
23
|
+
"""
|
|
24
|
+
Return a 32 bit integer. Designed for large memory-views such as numpy arrays.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def hash64(key, seed=0, x64arch=True, signed=True) -> Tuple[int, int]:
|
|
28
|
+
"""
|
|
29
|
+
Return a tuple of two 64 bit integers for a string. Optimized for
|
|
30
|
+
the x64 bit architecture when x64arch=True, otherwise for x86.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def hash128(key, seed=0, x64arch=True, signed=False) -> int:
|
|
34
|
+
"""
|
|
35
|
+
Return a 128 bit long integer. Optimized for the x64 bit architecture
|
|
36
|
+
when x64arch=True, otherwise for x86.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def hash_bytes(key, seed=0, x64arch=True) -> bytes:
|
|
40
|
+
"""
|
|
41
|
+
Return a 128 bit hash value as bytes for a string. Optimized for the
|
|
42
|
+
x64 bit architecture when x64arch=True, otherwise for the x86.
|
|
43
|
+
"""
|
maxframe/lib/wrapped_pickle.py
CHANGED
|
@@ -120,7 +120,8 @@ class _UnpickleSwitch:
|
|
|
120
120
|
@functools.wraps(func)
|
|
121
121
|
async def wrapped(*args, **kwargs):
|
|
122
122
|
with _UnpickleSwitch(forbidden=self._forbidden):
|
|
123
|
-
|
|
123
|
+
ret = await func(*args, **kwargs)
|
|
124
|
+
return ret
|
|
124
125
|
|
|
125
126
|
else:
|
|
126
127
|
|
maxframe/odpsio/__init__.py
CHANGED
maxframe/odpsio/arrow.py
CHANGED
|
@@ -45,9 +45,13 @@ def _rebuild_dataframe(
|
|
|
45
45
|
|
|
46
46
|
def _rebuild_index(df: pd.DataFrame, table_meta: DataFrameTableMeta) -> pd.Index:
|
|
47
47
|
if df.shape[1] > 1:
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
idx = pd.MultiIndex.from_frame(df)
|
|
49
|
+
idx.names = table_meta.pd_index_level_names
|
|
50
|
+
else:
|
|
51
|
+
# make sure even if None names are updated properly
|
|
52
|
+
idx = pd.Index(df.iloc[:, 0])
|
|
53
|
+
idx.name = table_meta.pd_index_level_names[0]
|
|
54
|
+
return idx
|
|
51
55
|
|
|
52
56
|
|
|
53
57
|
def arrow_to_pandas(
|
|
@@ -75,7 +79,7 @@ def pandas_to_arrow(
|
|
|
75
79
|
df.columns = pd.Index(table_meta.table_column_names)
|
|
76
80
|
if not ignore_index:
|
|
77
81
|
df = df.rename_axis(table_meta.table_index_column_names).reset_index()
|
|
78
|
-
elif ignore_index:
|
|
82
|
+
elif ignore_index and table_meta.type != OutputType.index:
|
|
79
83
|
df = pd.DataFrame([], columns=[])
|
|
80
84
|
elif table_meta.type == OutputType.index:
|
|
81
85
|
names = [f"_idx_{idx}" for idx in range(len(df.names))]
|
maxframe/odpsio/schema.py
CHANGED
|
@@ -126,10 +126,15 @@ def odps_type_to_arrow_type(
|
|
|
126
126
|
]
|
|
127
127
|
col_type = pa.struct(fields)
|
|
128
128
|
elif isinstance(odps_type, odps_types.Decimal):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
129
|
+
if odps_type.name == "decimal":
|
|
130
|
+
# legacy decimal data without precision or scale
|
|
131
|
+
# precision data from internal compat mode
|
|
132
|
+
col_type = pa.decimal128(38, 18)
|
|
133
|
+
else:
|
|
134
|
+
col_type = pa.decimal128(
|
|
135
|
+
odps_type.precision or odps_types.Decimal._max_precision,
|
|
136
|
+
odps_type.scale or odps_types.Decimal._max_scale,
|
|
137
|
+
)
|
|
133
138
|
elif isinstance(odps_type, (odps_types.Varchar, odps_types.Char)):
|
|
134
139
|
col_type = pa.string()
|
|
135
140
|
else:
|
|
@@ -289,8 +294,6 @@ def build_dataframe_table_meta(
|
|
|
289
294
|
else: # pragma: no cover
|
|
290
295
|
raise TypeError(f"Cannot accept type {type(df_obj)}")
|
|
291
296
|
|
|
292
|
-
assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
|
|
293
|
-
|
|
294
297
|
if obj_type == OutputType.scalar:
|
|
295
298
|
pd_dtypes = pd.Series([])
|
|
296
299
|
column_index_names = []
|
|
@@ -346,7 +349,7 @@ def build_dataframe_table_meta(
|
|
|
346
349
|
else:
|
|
347
350
|
index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
|
|
348
351
|
|
|
349
|
-
if ignore_index:
|
|
352
|
+
if ignore_index and obj_type != OutputType.index:
|
|
350
353
|
table_index_column_names = []
|
|
351
354
|
pd_index_dtypes = pd.Series([], index=[])
|
|
352
355
|
else:
|