maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl → 1.0.0rc1__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-311-darwin.so +0 -0
- maxframe/codegen.py +10 -2
- maxframe/config/config.py +4 -0
- maxframe/core/__init__.py +0 -3
- maxframe/core/entity/__init__.py +1 -8
- maxframe/core/entity/objects.py +3 -45
- maxframe/core/graph/core.cpython-311-darwin.so +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +21 -0
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/misc/apply.py +2 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/tests/test_misc.py +23 -0
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/protocol.py +108 -10
- maxframe/serialization/core.cpython-311-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +54 -25
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +7 -2
- maxframe/serialization/serializables/core.py +119 -12
- maxframe/serialization/serializables/tests/test_serializable.py +46 -4
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
- maxframe/tensor/base/atleast_1d.py +1 -1
- maxframe/tensor/base/unique.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +1 -1
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +2 -2
- maxframe/utils.py +16 -13
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/METADATA +2 -2
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/RECORD +46 -44
- maxframe_client/__init__.py +0 -1
- maxframe_client/session/odps.py +45 -5
- maxframe_client/session/task.py +41 -20
- maxframe_client/tests/test_session.py +36 -0
- maxframe_client/clients/spe.py +0 -104
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/top_level.txt +0 -0
|
Binary file
|
maxframe/codegen.py
CHANGED
|
@@ -86,6 +86,8 @@ class AbstractUDF(Serializable):
|
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
class UserCodeMixin:
|
|
89
|
+
__slots__ = ()
|
|
90
|
+
|
|
89
91
|
@classmethod
|
|
90
92
|
def obj_to_python_expr(cls, obj: Any = None) -> str:
|
|
91
93
|
"""
|
|
@@ -203,8 +205,12 @@ class BigDagCodeContext(metaclass=abc.ABCMeta):
|
|
|
203
205
|
return self._session_id
|
|
204
206
|
|
|
205
207
|
def register_udf(self, udf: AbstractUDF):
|
|
208
|
+
from maxframe_framedriver.services.session import SessionManager
|
|
209
|
+
|
|
206
210
|
udf.session_id = self._session_id
|
|
207
211
|
self._udfs[udf.name] = udf
|
|
212
|
+
if self._session_id and SessionManager.initialized():
|
|
213
|
+
SessionManager.instance().register_udf(self._session_id, udf)
|
|
208
214
|
|
|
209
215
|
def get_udfs(self) -> List[AbstractUDF]:
|
|
210
216
|
return list(self._udfs.values())
|
|
@@ -344,6 +350,8 @@ def register_engine_codegen(type_: Type["BigDagCodeGenerator"]):
|
|
|
344
350
|
BUILTIN_ENGINE_SPE = "SPE"
|
|
345
351
|
BUILTIN_ENGINE_MCSQL = "MCSQL"
|
|
346
352
|
|
|
353
|
+
FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
|
|
354
|
+
|
|
347
355
|
|
|
348
356
|
class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
349
357
|
_context: BigDagCodeContext
|
|
@@ -516,12 +524,12 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
|
516
524
|
|
|
517
525
|
def register_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
518
526
|
for udf in self._context.get_udfs():
|
|
519
|
-
logger.info("[Session
|
|
527
|
+
logger.info("[Session=%s] Registering UDF %s", self._session_id, udf.name)
|
|
520
528
|
udf.register(odps_ctx, True)
|
|
521
529
|
|
|
522
530
|
def unregister_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
523
531
|
for udf in self._context.get_udfs():
|
|
524
|
-
logger.info("[Session
|
|
532
|
+
logger.info("[Session=%s] Unregistering UDF %s", self._session_id, udf.name)
|
|
525
533
|
udf.unregister(odps_ctx)
|
|
526
534
|
|
|
527
535
|
def get_udfs(self) -> List[AbstractUDF]:
|
maxframe/config/config.py
CHANGED
|
@@ -40,6 +40,7 @@ _DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS = 120
|
|
|
40
40
|
_DEFAULT_UPLOAD_BATCH_SIZE = 4096
|
|
41
41
|
_DEFAULT_TEMP_LIFECYCLE = 1
|
|
42
42
|
_DEFAULT_TASK_START_TIMEOUT = 60
|
|
43
|
+
_DEFAULT_TASK_RESTART_TIMEOUT = 300
|
|
43
44
|
_DEFAULT_LOGVIEW_HOURS = 24 * 60
|
|
44
45
|
|
|
45
46
|
|
|
@@ -309,6 +310,9 @@ default_options.register_option(
|
|
|
309
310
|
default_options.register_option(
|
|
310
311
|
"client.task_start_timeout", _DEFAULT_TASK_START_TIMEOUT, validator=is_integer
|
|
311
312
|
)
|
|
313
|
+
default_options.register_option(
|
|
314
|
+
"client.task_restart_timeout", _DEFAULT_TASK_RESTART_TIMEOUT, validator=is_integer
|
|
315
|
+
)
|
|
312
316
|
default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remote=True)
|
|
313
317
|
default_options.register_option(
|
|
314
318
|
"sql.generate_comments", True, validator=is_bool, remote=True
|
maxframe/core/__init__.py
CHANGED
|
@@ -19,7 +19,6 @@ from .entity import (
|
|
|
19
19
|
CHUNK_TYPE,
|
|
20
20
|
ENTITY_TYPE,
|
|
21
21
|
FUSE_CHUNK_TYPE,
|
|
22
|
-
OBJECT_CHUNK_TYPE,
|
|
23
22
|
OBJECT_TYPE,
|
|
24
23
|
TILEABLE_TYPE,
|
|
25
24
|
Chunk,
|
|
@@ -33,8 +32,6 @@ from .entity import (
|
|
|
33
32
|
HasShapeTileableData,
|
|
34
33
|
NotSupportTile,
|
|
35
34
|
Object,
|
|
36
|
-
ObjectChunk,
|
|
37
|
-
ObjectChunkData,
|
|
38
35
|
ObjectData,
|
|
39
36
|
OutputType,
|
|
40
37
|
Tileable,
|
maxframe/core/entity/__init__.py
CHANGED
|
@@ -16,14 +16,7 @@ from .chunks import CHUNK_TYPE, Chunk, ChunkData
|
|
|
16
16
|
from .core import ENTITY_TYPE, Entity, EntityData
|
|
17
17
|
from .executable import ExecutableTuple, _ExecuteAndFetchMixin
|
|
18
18
|
from .fuse import FUSE_CHUNK_TYPE, FuseChunk, FuseChunkData
|
|
19
|
-
from .objects import
|
|
20
|
-
OBJECT_CHUNK_TYPE,
|
|
21
|
-
OBJECT_TYPE,
|
|
22
|
-
Object,
|
|
23
|
-
ObjectChunk,
|
|
24
|
-
ObjectChunkData,
|
|
25
|
-
ObjectData,
|
|
26
|
-
)
|
|
19
|
+
from .objects import OBJECT_TYPE, Object, ObjectData
|
|
27
20
|
from .output_types import (
|
|
28
21
|
OutputType,
|
|
29
22
|
get_fetch_class,
|
maxframe/core/entity/objects.py
CHANGED
|
@@ -14,58 +14,17 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Dict
|
|
16
16
|
|
|
17
|
-
from ...serialization.serializables import FieldTypes, ListField
|
|
18
|
-
from ...utils import skip_na_call
|
|
19
|
-
from .chunks import Chunk, ChunkData
|
|
20
17
|
from .core import Entity
|
|
21
18
|
from .executable import _ToObjectMixin
|
|
22
19
|
from .tileables import TileableData
|
|
23
20
|
|
|
24
21
|
|
|
25
|
-
class ObjectChunkData(ChunkData):
|
|
26
|
-
# chunk whose data could be any serializable
|
|
27
|
-
__slots__ = ()
|
|
28
|
-
type_name = "Object"
|
|
29
|
-
|
|
30
|
-
def __init__(self, op=None, index=None, **kw):
|
|
31
|
-
super().__init__(_op=op, _index=index, **kw)
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
|
-
def params(self) -> Dict[str, Any]:
|
|
35
|
-
# params return the properties which useful to rebuild a new chunk
|
|
36
|
-
return {
|
|
37
|
-
"index": self.index,
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
@params.setter
|
|
41
|
-
def params(self, new_params: Dict[str, Any]):
|
|
42
|
-
params = new_params.copy()
|
|
43
|
-
params.pop("index", None) # index not needed to update
|
|
44
|
-
if params: # pragma: no cover
|
|
45
|
-
raise TypeError(f"Unknown params: {list(params)}")
|
|
46
|
-
|
|
47
|
-
@classmethod
|
|
48
|
-
def get_params_from_data(cls, data: Any) -> Dict[str, Any]:
|
|
49
|
-
return dict()
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class ObjectChunk(Chunk):
|
|
53
|
-
__slots__ = ()
|
|
54
|
-
_allow_data_type_ = (ObjectChunkData,)
|
|
55
|
-
type_name = "Object"
|
|
56
|
-
|
|
57
|
-
|
|
58
22
|
class ObjectData(TileableData, _ToObjectMixin):
|
|
59
23
|
__slots__ = ()
|
|
60
24
|
type_name = "Object"
|
|
61
|
-
|
|
62
|
-
#
|
|
63
|
-
|
|
64
|
-
"chunks",
|
|
65
|
-
FieldTypes.reference(ObjectChunkData),
|
|
66
|
-
on_serialize=skip_na_call(lambda x: [it.data for it in x]),
|
|
67
|
-
on_deserialize=skip_na_call(lambda x: [ObjectChunk(it) for it in x]),
|
|
68
|
-
)
|
|
25
|
+
# workaround for removed field since v0.1.0b5
|
|
26
|
+
# todo remove this when all versions below v0.1.0b5 is eliminated
|
|
27
|
+
_legacy_deprecated_non_primitives = ["_chunks"]
|
|
69
28
|
|
|
70
29
|
def __init__(self, op=None, nsplits=None, **kw):
|
|
71
30
|
super().__init__(_op=op, _nsplits=nsplits, **kw)
|
|
@@ -97,4 +56,3 @@ class Object(Entity, _ToObjectMixin):
|
|
|
97
56
|
|
|
98
57
|
|
|
99
58
|
OBJECT_TYPE = (Object, ObjectData)
|
|
100
|
-
OBJECT_CHUNK_TYPE = (ObjectChunk, ObjectChunkData)
|
|
Binary file
|
maxframe/core/graph/core.pyx
CHANGED
|
@@ -354,10 +354,10 @@ cdef class DirectedGraph:
|
|
|
354
354
|
sio.write(f'"Chunk:{self._gen_chunk_key(input_chunk, trunc_key)}" {chunk_style}\n')
|
|
355
355
|
visited.add(input_chunk.key)
|
|
356
356
|
if op.key not in visited:
|
|
357
|
-
sio.write(f'"{op_name}:{op.key[:trunc_key]}" {operator_style}\n')
|
|
357
|
+
sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" {operator_style}\n')
|
|
358
358
|
visited.add(op.key)
|
|
359
359
|
sio.write(f'"Chunk:{self._gen_chunk_key(input_chunk, trunc_key)}" -> '
|
|
360
|
-
f'"{op_name}:{op.key[:trunc_key]}"\n')
|
|
360
|
+
f'"{op_name}:{op.key[:trunc_key]}_{id(op)}"\n')
|
|
361
361
|
|
|
362
362
|
for output_chunk in (op.outputs or []):
|
|
363
363
|
if output_chunk.key not in visited:
|
|
@@ -367,9 +367,9 @@ cdef class DirectedGraph:
|
|
|
367
367
|
sio.write(f'"Chunk:{self._gen_chunk_key(output_chunk, trunc_key)}" {tmp_chunk_style}\n')
|
|
368
368
|
visited.add(output_chunk.key)
|
|
369
369
|
if op.key not in visited:
|
|
370
|
-
sio.write(f'"{op_name}:{op.key[:trunc_key]}" {operator_style}\n')
|
|
370
|
+
sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" {operator_style}\n')
|
|
371
371
|
visited.add(op.key)
|
|
372
|
-
sio.write(f'"{op_name}:{op.key[:trunc_key]}" -> '
|
|
372
|
+
sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" -> '
|
|
373
373
|
f'"Chunk:{self._gen_chunk_key(output_chunk, trunc_key)}"')
|
|
374
374
|
if show_columns:
|
|
375
375
|
sio.write(f' [ label={get_col_names(output_chunk)} ]')
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
from ... import DataFrame
|
|
18
|
+
from ..to_odps import to_odps_table
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.fixture
|
|
22
|
+
def df():
|
|
23
|
+
return DataFrame({"A": [1, 2], "B": [3, 4]})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.mark.parametrize(
|
|
27
|
+
"kwargs",
|
|
28
|
+
[
|
|
29
|
+
{"partition_col": ["A", "C"]},
|
|
30
|
+
{"partition_col": "C"},
|
|
31
|
+
{"partition": "a=1,C=2"},
|
|
32
|
+
],
|
|
33
|
+
)
|
|
34
|
+
def test_to_odps_table_validation(df, kwargs):
|
|
35
|
+
with pytest.raises(ValueError):
|
|
36
|
+
to_odps_table(df, "test_table", **kwargs)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.mark.parametrize(
|
|
40
|
+
"kwargs",
|
|
41
|
+
[
|
|
42
|
+
{"partition_col": ["a", "B"]},
|
|
43
|
+
{"partition_col": "a"},
|
|
44
|
+
{"partition": "C=1,d=2"},
|
|
45
|
+
],
|
|
46
|
+
)
|
|
47
|
+
def test_to_odps_table_vaild(df, kwargs):
|
|
48
|
+
to_odps_table(df, "test_table", **kwargs)
|
|
@@ -18,10 +18,12 @@ import logging
|
|
|
18
18
|
from typing import List, Optional, Union
|
|
19
19
|
|
|
20
20
|
from odps.models import Table as ODPSTable
|
|
21
|
+
from odps.types import PartitionSpec
|
|
21
22
|
|
|
22
23
|
from ... import opcodes
|
|
23
24
|
from ...config import options
|
|
24
25
|
from ...core import OutputType
|
|
26
|
+
from ...odpsio import build_dataframe_table_meta
|
|
25
27
|
from ...serialization.serializables import (
|
|
26
28
|
BoolField,
|
|
27
29
|
FieldTypes,
|
|
@@ -147,6 +149,25 @@ def to_odps_table(
|
|
|
147
149
|
f"index_label needs {len(df.index.nlevels)} labels "
|
|
148
150
|
f"but it only have {len(index_label)}"
|
|
149
151
|
)
|
|
152
|
+
table_cols = set(build_dataframe_table_meta(df).table_column_names)
|
|
153
|
+
if partition:
|
|
154
|
+
partition_intersect = (
|
|
155
|
+
set(x.lower() for x in PartitionSpec(partition).keys()) & table_cols
|
|
156
|
+
)
|
|
157
|
+
if partition_intersect:
|
|
158
|
+
raise ValueError(
|
|
159
|
+
f"Data column(s) {partition_intersect} in the dataframe"
|
|
160
|
+
" cannot be used in parameter 'partition'."
|
|
161
|
+
" Use 'partition_col' instead."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if partition_col:
|
|
165
|
+
partition_diff = set(x.lower() for x in partition_col) - table_cols
|
|
166
|
+
if partition_diff:
|
|
167
|
+
raise ValueError(
|
|
168
|
+
f"Partition column(s) {partition_diff}"
|
|
169
|
+
" is not the data column(s) of the input dataframe."
|
|
170
|
+
)
|
|
150
171
|
|
|
151
172
|
op = DataFrameToODPSTable(
|
|
152
173
|
dtypes=df.dtypes,
|
|
@@ -138,7 +138,7 @@ class DataFrameAlign(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
138
138
|
series_index = rhs.index_value.to_pandas()
|
|
139
139
|
dtypes = lhs.dtypes.reindex(
|
|
140
140
|
lhs.dtypes.index.join(series_index, how=self.join)
|
|
141
|
-
).fillna(np.dtype(
|
|
141
|
+
).fillna(np.dtype(float))
|
|
142
142
|
l_shape[1] = r_size = len(dtypes)
|
|
143
143
|
col_val = r_idx_val = parse_index(dtypes.index, store_data=True)
|
|
144
144
|
|
maxframe/dataframe/misc/apply.py
CHANGED
|
@@ -170,6 +170,8 @@ class ApplyOperator(
|
|
|
170
170
|
elif self.output_types[0] == OutputType.dataframe:
|
|
171
171
|
shape = [np.nan, np.nan]
|
|
172
172
|
shape[1 - self.axis] = df.shape[1 - self.axis]
|
|
173
|
+
if self.axis == 1:
|
|
174
|
+
shape[1] = len(dtypes)
|
|
173
175
|
shape = tuple(shape)
|
|
174
176
|
else:
|
|
175
177
|
shape = (df.shape[1 - self.axis],)
|
|
@@ -58,7 +58,7 @@ class DataFrameMemoryUsage(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
58
58
|
"""
|
|
59
59
|
if df_or_series.ndim == 1:
|
|
60
60
|
# the input data is a series, a Scalar will be returned
|
|
61
|
-
return self.new_scalar([df_or_series], dtype=np.dtype(
|
|
61
|
+
return self.new_scalar([df_or_series], dtype=np.dtype(int))
|
|
62
62
|
else:
|
|
63
63
|
# the input data is a DataFrame, a Scalar will be returned
|
|
64
64
|
# calculate shape of returning series given ``op.index``
|
|
@@ -71,7 +71,7 @@ class DataFrameMemoryUsage(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
71
71
|
[df_or_series],
|
|
72
72
|
index_value=self._adapt_index(df_or_series.columns_value),
|
|
73
73
|
shape=new_shape,
|
|
74
|
-
dtype=np.dtype(
|
|
74
|
+
dtype=np.dtype(int),
|
|
75
75
|
)
|
|
76
76
|
|
|
77
77
|
|
|
@@ -18,6 +18,7 @@ import pytest
|
|
|
18
18
|
|
|
19
19
|
from .... import opcodes
|
|
20
20
|
from ....core import OutputType
|
|
21
|
+
from ....dataframe import DataFrame
|
|
21
22
|
from ....tensor.core import TENSOR_TYPE
|
|
22
23
|
from ... import eval as maxframe_eval
|
|
23
24
|
from ... import get_dummies, to_numeric
|
|
@@ -430,6 +431,28 @@ def test_case_when():
|
|
|
430
431
|
assert isinstance(col.inputs[2].op, DataFrameGreater)
|
|
431
432
|
|
|
432
433
|
|
|
434
|
+
def test_apply():
|
|
435
|
+
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
|
|
436
|
+
|
|
437
|
+
keys = [1, 2]
|
|
438
|
+
|
|
439
|
+
def f(x, keys):
|
|
440
|
+
if x["a"] in keys:
|
|
441
|
+
return [1, 0]
|
|
442
|
+
else:
|
|
443
|
+
return [0, 1]
|
|
444
|
+
|
|
445
|
+
apply_df = df[["a"]].apply(
|
|
446
|
+
f,
|
|
447
|
+
output_type="dataframe",
|
|
448
|
+
dtypes=pd.Series(["int64", "int64"]),
|
|
449
|
+
axis=1,
|
|
450
|
+
result_type="expand",
|
|
451
|
+
keys=keys,
|
|
452
|
+
)
|
|
453
|
+
assert apply_df.shape == (3, 2)
|
|
454
|
+
|
|
455
|
+
|
|
433
456
|
def test_pivot_table():
|
|
434
457
|
from ...groupby.aggregation import DataFrameGroupByAgg
|
|
435
458
|
from ...misc.pivot_table import DataFramePivotTable
|
|
@@ -43,7 +43,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
43
43
|
def __call__(self, df_or_series):
|
|
44
44
|
if isinstance(df_or_series, SERIES_TYPE):
|
|
45
45
|
inputs = filter_inputs([df_or_series, self.other])
|
|
46
|
-
return self.new_scalar(inputs, dtype=np.dtype(
|
|
46
|
+
return self.new_scalar(inputs, dtype=np.dtype(float))
|
|
47
47
|
else:
|
|
48
48
|
|
|
49
49
|
def _filter_numeric(obj):
|
|
@@ -60,7 +60,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
60
60
|
inputs = filter_inputs([df_or_series, self.other])
|
|
61
61
|
if self.axis is None:
|
|
62
62
|
dtypes = pd.Series(
|
|
63
|
-
[np.dtype(
|
|
63
|
+
[np.dtype(float)] * len(df_or_series.dtypes),
|
|
64
64
|
index=df_or_series.dtypes.index,
|
|
65
65
|
)
|
|
66
66
|
return self.new_dataframe(
|
|
@@ -85,7 +85,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
85
85
|
return self.new_series(
|
|
86
86
|
inputs,
|
|
87
87
|
shape=shape,
|
|
88
|
-
dtype=np.dtype(
|
|
88
|
+
dtype=np.dtype(float),
|
|
89
89
|
index_value=new_index_value,
|
|
90
90
|
)
|
|
91
91
|
|
maxframe/errors.py
CHANGED
|
@@ -17,5 +17,18 @@ class MaxFrameError(Exception):
|
|
|
17
17
|
pass
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
class MaxFrameIntentionalError(MaxFrameError):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
20
24
|
class MaxFrameUserError(MaxFrameError):
|
|
21
25
|
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class NoTaskServerResponseError(MaxFrameError):
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SessionAlreadyClosedError(MaxFrameError):
|
|
33
|
+
def __init__(self, session_id: str):
|
|
34
|
+
super().__init__(f"Session {session_id} is already closed")
|
maxframe/extension.py
CHANGED
|
@@ -48,6 +48,18 @@ class MaxFrameExtension(metaclass=abc.ABCMeta):
|
|
|
48
48
|
"""
|
|
49
49
|
pass
|
|
50
50
|
|
|
51
|
+
@classmethod
|
|
52
|
+
async def reload_session(cls, session_id: str) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Reload the session state when the session is recovered from failover.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
session_id : str
|
|
59
|
+
The session id.
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
51
63
|
@classmethod
|
|
52
64
|
def init_service_extension(cls) -> None:
|
|
53
65
|
"""
|
|
Binary file
|
maxframe/lib/mmh3.pyi
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
|
|
17
|
+
def hash(key, seed=0, signed=True) -> int:
|
|
18
|
+
"""
|
|
19
|
+
Return a 32 bit integer.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def hash_from_buffer(key, seed=0, signed=True) -> int:
|
|
23
|
+
"""
|
|
24
|
+
Return a 32 bit integer. Designed for large memory-views such as numpy arrays.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def hash64(key, seed=0, x64arch=True, signed=True) -> Tuple[int, int]:
|
|
28
|
+
"""
|
|
29
|
+
Return a tuple of two 64 bit integers for a string. Optimized for
|
|
30
|
+
the x64 bit architecture when x64arch=True, otherwise for x86.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def hash128(key, seed=0, x64arch=True, signed=False) -> int:
|
|
34
|
+
"""
|
|
35
|
+
Return a 128 bit long integer. Optimized for the x64 bit architecture
|
|
36
|
+
when x64arch=True, otherwise for x86.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def hash_bytes(key, seed=0, x64arch=True) -> bytes:
|
|
40
|
+
"""
|
|
41
|
+
Return a 128 bit hash value as bytes for a string. Optimized for the
|
|
42
|
+
x64 bit architecture when x64arch=True, otherwise for the x86.
|
|
43
|
+
"""
|
maxframe/lib/wrapped_pickle.py
CHANGED
|
@@ -120,7 +120,8 @@ class _UnpickleSwitch:
|
|
|
120
120
|
@functools.wraps(func)
|
|
121
121
|
async def wrapped(*args, **kwargs):
|
|
122
122
|
with _UnpickleSwitch(forbidden=self._forbidden):
|
|
123
|
-
|
|
123
|
+
ret = await func(*args, **kwargs)
|
|
124
|
+
return ret
|
|
124
125
|
|
|
125
126
|
else:
|
|
126
127
|
|