maxframe 0.1.0b5__cp38-cp38-win_amd64.whl → 1.0.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp38-win_amd64.pyd +0 -0
- maxframe/codegen.py +10 -4
- maxframe/config/config.py +68 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +31 -7
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +117 -23
- maxframe/dataframe/datasource/read_odps_table.py +6 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +33 -2
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +26 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
- maxframe/learn/contrib/xgboost/predict.py +29 -46
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +29 -18
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +8 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/{base → misc}/unique.py +3 -3
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +106 -86
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +81 -74
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +194 -40
- maxframe_client/session/task.py +94 -39
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +109 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -322
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ..objects import Object, ObjectData
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestSubObjectData(ObjectData):
|
|
19
|
+
__test__ = False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestSubObject(Object):
|
|
23
|
+
__test__ = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_object_init():
|
|
27
|
+
assert TestSubObjectData.get_entity_class() is TestSubObject
|
|
28
|
+
|
|
29
|
+
obj = ObjectData(
|
|
30
|
+
object_class=TestSubObjectData.__module__ + "#" + TestSubObjectData.__name__
|
|
31
|
+
)
|
|
32
|
+
assert isinstance(obj, TestSubObjectData)
|
|
33
|
+
|
|
34
|
+
obj = ObjectData(object_class=TestSubObjectData)
|
|
35
|
+
assert isinstance(obj, TestSubObjectData)
|
|
36
|
+
|
|
37
|
+
obj = ObjectData(
|
|
38
|
+
object_class=TestSubObject.__module__ + "#" + TestSubObject.__name__
|
|
39
|
+
)
|
|
40
|
+
assert isinstance(obj, TestSubObjectData)
|
|
41
|
+
|
|
42
|
+
obj = ObjectData(object_class=TestSubObject)
|
|
43
|
+
assert isinstance(obj, TestSubObjectData)
|
|
@@ -15,17 +15,15 @@
|
|
|
15
15
|
import builtins
|
|
16
16
|
import itertools
|
|
17
17
|
from operator import attrgetter
|
|
18
|
-
from typing import Callable, List
|
|
19
18
|
from weakref import WeakKeyDictionary, WeakSet
|
|
20
19
|
|
|
21
20
|
import numpy as np
|
|
22
21
|
|
|
23
22
|
from ...serialization.serializables import BoolField, FieldTypes, TupleField
|
|
24
|
-
from ...typing_ import
|
|
23
|
+
from ...typing_ import TileableType
|
|
25
24
|
from ...utils import on_deserialize_shape, on_serialize_nsplits, on_serialize_shape
|
|
26
25
|
from ..base import Base
|
|
27
26
|
from ..mode import enter_mode
|
|
28
|
-
from .chunks import Chunk
|
|
29
27
|
from .core import Entity, EntityData
|
|
30
28
|
from .executable import _ExecutableMixin
|
|
31
29
|
|
|
@@ -34,79 +32,6 @@ class NotSupportTile(Exception):
|
|
|
34
32
|
pass
|
|
35
33
|
|
|
36
34
|
|
|
37
|
-
class OperatorTilesHandler:
|
|
38
|
-
_handlers = dict()
|
|
39
|
-
|
|
40
|
-
@classmethod
|
|
41
|
-
def _get_op_cls(cls, op: OperatorType):
|
|
42
|
-
if isinstance(op, type):
|
|
43
|
-
return op
|
|
44
|
-
return type(op)
|
|
45
|
-
|
|
46
|
-
@classmethod
|
|
47
|
-
def register(
|
|
48
|
-
cls, op: OperatorType, tile_handler: Callable[[OperatorType], TileableType]
|
|
49
|
-
):
|
|
50
|
-
cls._handlers[cls._get_op_cls(op)] = tile_handler
|
|
51
|
-
|
|
52
|
-
@classmethod
|
|
53
|
-
def unregister(cls, op: OperatorType):
|
|
54
|
-
del cls._handlers[cls._get_op_cls(op)]
|
|
55
|
-
|
|
56
|
-
@classmethod
|
|
57
|
-
def get_handler(
|
|
58
|
-
cls, op: OperatorType
|
|
59
|
-
) -> Callable[[OperatorType], List[TileableType]]:
|
|
60
|
-
op_cls = cls._get_op_cls(op)
|
|
61
|
-
return cls._handlers.get(op_cls, op_cls.tile)
|
|
62
|
-
|
|
63
|
-
@classmethod
|
|
64
|
-
def _assign_to(
|
|
65
|
-
cls,
|
|
66
|
-
tile_after_tensor_datas: List["TileableData"],
|
|
67
|
-
tile_before_tensor_datas: List["TileableData"],
|
|
68
|
-
):
|
|
69
|
-
assert len(tile_after_tensor_datas) == len(tile_before_tensor_datas)
|
|
70
|
-
|
|
71
|
-
for tile_after_tensor_data, tile_before_tensor_data in zip(
|
|
72
|
-
tile_after_tensor_datas, tile_before_tensor_datas
|
|
73
|
-
):
|
|
74
|
-
if tile_before_tensor_data is None:
|
|
75
|
-
# garbage collected
|
|
76
|
-
continue
|
|
77
|
-
tile_after_tensor_data.copy_to(tile_before_tensor_data)
|
|
78
|
-
tile_before_tensor_data.op.outputs = tile_before_tensor_datas
|
|
79
|
-
|
|
80
|
-
@enter_mode(kernel=True)
|
|
81
|
-
def dispatch(self, op: OperatorType):
|
|
82
|
-
op_cls = self._get_op_cls(op)
|
|
83
|
-
tiled = None
|
|
84
|
-
cause = None
|
|
85
|
-
|
|
86
|
-
if op_cls in self._handlers:
|
|
87
|
-
tiled = self._handlers[op_cls](op)
|
|
88
|
-
else:
|
|
89
|
-
try:
|
|
90
|
-
tiled = op_cls.tile(op)
|
|
91
|
-
except NotImplementedError as ex:
|
|
92
|
-
cause = ex
|
|
93
|
-
for super_cls in op_cls.__mro__:
|
|
94
|
-
if super_cls in self._handlers:
|
|
95
|
-
h = self._handlers[op_cls] = self._handlers[super_cls]
|
|
96
|
-
tiled = h(op)
|
|
97
|
-
break
|
|
98
|
-
|
|
99
|
-
if tiled is not None:
|
|
100
|
-
return tiled if isinstance(tiled, list) else [tiled]
|
|
101
|
-
else:
|
|
102
|
-
raise NotImplementedError(f"{type(op)} does not support tile") from cause
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
handler = OperatorTilesHandler()
|
|
106
|
-
register = OperatorTilesHandler.register
|
|
107
|
-
unregister = OperatorTilesHandler.unregister
|
|
108
|
-
|
|
109
|
-
|
|
110
35
|
class _ChunksIndexer:
|
|
111
36
|
__slots__ = ("_tileable",)
|
|
112
37
|
|
|
@@ -231,7 +156,7 @@ entity_view_handler = EntityDataModificationHandler()
|
|
|
231
156
|
|
|
232
157
|
|
|
233
158
|
class TileableData(EntityData, _ExecutableMixin):
|
|
234
|
-
__slots__ = "_cix", "_entities", "_executed_sessions"
|
|
159
|
+
__slots__ = "_chunks", "_cix", "_entities", "_executed_sessions"
|
|
235
160
|
_no_copy_attrs_ = Base._no_copy_attrs_ | {"_cix"}
|
|
236
161
|
|
|
237
162
|
# optional fields
|
|
@@ -245,6 +170,8 @@ class TileableData(EntityData, _ExecutableMixin):
|
|
|
245
170
|
cache = BoolField("cache", default=False)
|
|
246
171
|
|
|
247
172
|
def __init__(self: TileableType, *args, **kwargs):
|
|
173
|
+
if kwargs.get("chunks") is not None:
|
|
174
|
+
self._chunks = kwargs.pop("chunks")
|
|
248
175
|
if kwargs.get("_nsplits", None) is not None:
|
|
249
176
|
kwargs["_nsplits"] = tuple(tuple(s) for s in kwargs["_nsplits"])
|
|
250
177
|
|
|
@@ -270,7 +197,7 @@ class TileableData(EntityData, _ExecutableMixin):
|
|
|
270
197
|
return tuple(map(len, self._nsplits))
|
|
271
198
|
|
|
272
199
|
@property
|
|
273
|
-
def chunks(self) ->
|
|
200
|
+
def chunks(self) -> list:
|
|
274
201
|
return getattr(self, "_chunks", None)
|
|
275
202
|
|
|
276
203
|
@property
|
maxframe/core/graph/__init__.py
CHANGED
|
@@ -12,6 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .builder import
|
|
15
|
+
from .builder import TileableGraphBuilder
|
|
16
16
|
from .core import DAG, DirectedGraph, GraphContainsCycleError
|
|
17
|
-
from .entity import
|
|
17
|
+
from .entity import EntityGraph, GraphSerializer, TileableGraph
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
|
-
from typing import Generator, List, Set
|
|
17
|
+
from typing import Generator, List, Set
|
|
18
18
|
|
|
19
19
|
from ....typing_ import EntityType
|
|
20
|
-
from ..entity import
|
|
20
|
+
from ..entity import EntityGraph
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def _default_inputs_selector(inputs: List[EntityType]) -> List[EntityType]:
|
|
@@ -43,7 +43,7 @@ class AbstractGraphBuilder(ABC):
|
|
|
43
43
|
|
|
44
44
|
def _add_nodes(
|
|
45
45
|
self,
|
|
46
|
-
graph:
|
|
46
|
+
graph: EntityGraph,
|
|
47
47
|
nodes: List[EntityType],
|
|
48
48
|
visited: Set,
|
|
49
49
|
):
|
|
@@ -75,7 +75,7 @@ class AbstractGraphBuilder(ABC):
|
|
|
75
75
|
nodes.append(out)
|
|
76
76
|
|
|
77
77
|
@abstractmethod
|
|
78
|
-
def build(self) -> Generator[
|
|
78
|
+
def build(self) -> Generator[EntityGraph, None, None]:
|
|
79
79
|
"""
|
|
80
80
|
Build a entity graph.
|
|
81
81
|
|
|
@@ -84,3 +84,4 @@ class AbstractGraphBuilder(ABC):
|
|
|
84
84
|
graph : EntityGraph
|
|
85
85
|
Entity graph.
|
|
86
86
|
"""
|
|
87
|
+
raise NotImplementedError
|
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Generator
|
|
15
|
+
from typing import Generator
|
|
16
16
|
|
|
17
17
|
from ...mode import enter_mode
|
|
18
|
-
from ..entity import
|
|
18
|
+
from ..entity import TileableGraph
|
|
19
19
|
from .base import AbstractGraphBuilder
|
|
20
20
|
|
|
21
21
|
|
|
@@ -26,9 +26,9 @@ class TileableGraphBuilder(AbstractGraphBuilder):
|
|
|
26
26
|
super().__init__(graph=graph)
|
|
27
27
|
|
|
28
28
|
@enter_mode(build=True, kernel=True)
|
|
29
|
-
def _build(self) ->
|
|
29
|
+
def _build(self) -> TileableGraph:
|
|
30
30
|
self._add_nodes(self._graph, list(self._graph.result_tileables), set())
|
|
31
31
|
return self._graph
|
|
32
32
|
|
|
33
|
-
def build(self) -> Generator[
|
|
33
|
+
def build(self) -> Generator[TileableGraph, None, None]:
|
|
34
34
|
yield self._build()
|
|
@@ -13,12 +13,11 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import itertools
|
|
16
|
-
from typing import List
|
|
16
|
+
from typing import List
|
|
17
17
|
|
|
18
18
|
from ....typing_ import TileableType
|
|
19
19
|
from ...mode import enter_mode
|
|
20
|
-
from ..entity import
|
|
21
|
-
from .chunk import ChunkGraphBuilder
|
|
20
|
+
from ..entity import EntityGraph, TileableGraph
|
|
22
21
|
from .tileable import TileableGraphBuilder
|
|
23
22
|
|
|
24
23
|
|
|
@@ -28,14 +27,11 @@ def build_graph(
|
|
|
28
27
|
tile: bool = False,
|
|
29
28
|
fuse_enabled: bool = True,
|
|
30
29
|
**chunk_graph_build_kwargs
|
|
31
|
-
) ->
|
|
30
|
+
) -> EntityGraph:
|
|
32
31
|
tileables = list(itertools.chain(*(tileable.op.outputs for tileable in tileables)))
|
|
33
32
|
tileable_graph = TileableGraph(tileables)
|
|
34
33
|
tileable_graph_builder = TileableGraphBuilder(tileable_graph)
|
|
35
34
|
tileable_graph = next(tileable_graph_builder.build())
|
|
36
35
|
if not tile:
|
|
37
36
|
return tileable_graph
|
|
38
|
-
|
|
39
|
-
tileable_graph, fuse_enabled=fuse_enabled, **chunk_graph_build_kwargs
|
|
40
|
-
)
|
|
41
|
-
return next(chunk_graph_builder.build())
|
|
37
|
+
raise NotImplementedError
|
|
Binary file
|
maxframe/core/graph/core.pyx
CHANGED
|
@@ -354,10 +354,10 @@ cdef class DirectedGraph:
|
|
|
354
354
|
sio.write(f'"Chunk:{self._gen_chunk_key(input_chunk, trunc_key)}" {chunk_style}\n')
|
|
355
355
|
visited.add(input_chunk.key)
|
|
356
356
|
if op.key not in visited:
|
|
357
|
-
sio.write(f'"{op_name}:{op.key[:trunc_key]}" {operator_style}\n')
|
|
357
|
+
sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" {operator_style}\n')
|
|
358
358
|
visited.add(op.key)
|
|
359
359
|
sio.write(f'"Chunk:{self._gen_chunk_key(input_chunk, trunc_key)}" -> '
|
|
360
|
-
f'"{op_name}:{op.key[:trunc_key]}"\n')
|
|
360
|
+
f'"{op_name}:{op.key[:trunc_key]}_{id(op)}"\n')
|
|
361
361
|
|
|
362
362
|
for output_chunk in (op.outputs or []):
|
|
363
363
|
if output_chunk.key not in visited:
|
|
@@ -367,9 +367,9 @@ cdef class DirectedGraph:
|
|
|
367
367
|
sio.write(f'"Chunk:{self._gen_chunk_key(output_chunk, trunc_key)}" {tmp_chunk_style}\n')
|
|
368
368
|
visited.add(output_chunk.key)
|
|
369
369
|
if op.key not in visited:
|
|
370
|
-
sio.write(f'"{op_name}:{op.key[:trunc_key]}" {operator_style}\n')
|
|
370
|
+
sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" {operator_style}\n')
|
|
371
371
|
visited.add(op.key)
|
|
372
|
-
sio.write(f'"{op_name}:{op.key[:trunc_key]}" -> '
|
|
372
|
+
sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" -> '
|
|
373
373
|
f'"Chunk:{self._gen_chunk_key(output_chunk, trunc_key)}"')
|
|
374
374
|
if show_columns:
|
|
375
375
|
sio.write(f' [ label={get_col_names(output_chunk)} ]')
|
maxframe/core/graph/entity.py
CHANGED
|
@@ -13,9 +13,9 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABCMeta, abstractmethod
|
|
16
|
-
from typing import Dict, Iterable, List
|
|
16
|
+
from typing import Dict, Iterable, List
|
|
17
17
|
|
|
18
|
-
from ...core import
|
|
18
|
+
from ...core import Tileable
|
|
19
19
|
from ...serialization.core import buffered
|
|
20
20
|
from ...serialization.serializables import BoolField, DictField, ListField, Serializable
|
|
21
21
|
from ...serialization.serializables.core import SerializableSerializer
|
|
@@ -97,26 +97,6 @@ class TileableGraph(EntityGraph, Iterable[Tileable]):
|
|
|
97
97
|
return self._logic_key
|
|
98
98
|
|
|
99
99
|
|
|
100
|
-
class ChunkGraph(EntityGraph, Iterable[Chunk]):
|
|
101
|
-
_result_chunks: List[Chunk]
|
|
102
|
-
|
|
103
|
-
def __init__(self, result_chunks: List[Chunk] = None):
|
|
104
|
-
super().__init__()
|
|
105
|
-
self._result_chunks = result_chunks
|
|
106
|
-
|
|
107
|
-
@property
|
|
108
|
-
def result_chunks(self):
|
|
109
|
-
return self._result_chunks
|
|
110
|
-
|
|
111
|
-
@property
|
|
112
|
-
def results(self):
|
|
113
|
-
return self._result_chunks
|
|
114
|
-
|
|
115
|
-
@results.setter
|
|
116
|
-
def results(self, new_results):
|
|
117
|
-
self._result_chunks = new_results
|
|
118
|
-
|
|
119
|
-
|
|
120
100
|
class SerializableGraph(Serializable):
|
|
121
101
|
_is_chunk = BoolField("is_chunk")
|
|
122
102
|
# TODO(qinxuye): remove this logic when we handle fetch elegantly,
|
|
@@ -132,12 +112,11 @@ class SerializableGraph(Serializable):
|
|
|
132
112
|
_results = ListField("results")
|
|
133
113
|
|
|
134
114
|
@classmethod
|
|
135
|
-
def from_graph(cls, graph:
|
|
115
|
+
def from_graph(cls, graph: EntityGraph) -> "SerializableGraph":
|
|
136
116
|
from ..operator import Fetch
|
|
137
117
|
|
|
138
|
-
is_chunk = isinstance(graph, ChunkGraph)
|
|
139
118
|
return SerializableGraph(
|
|
140
|
-
_is_chunk=
|
|
119
|
+
_is_chunk=False,
|
|
141
120
|
_fetch_nodes=[chunk for chunk in graph if isinstance(chunk.op, Fetch)],
|
|
142
121
|
_nodes=graph._nodes,
|
|
143
122
|
_predecessors=graph._predecessors,
|
|
@@ -145,9 +124,8 @@ class SerializableGraph(Serializable):
|
|
|
145
124
|
_results=graph.results,
|
|
146
125
|
)
|
|
147
126
|
|
|
148
|
-
def to_graph(self) ->
|
|
149
|
-
|
|
150
|
-
graph = graph_cls(self._results)
|
|
127
|
+
def to_graph(self) -> EntityGraph:
|
|
128
|
+
graph = TileableGraph(self._results)
|
|
151
129
|
graph._nodes.update(self._nodes)
|
|
152
130
|
graph._predecessors.update(self._predecessors)
|
|
153
131
|
graph._successors.update(self._successors)
|
|
@@ -156,14 +134,12 @@ class SerializableGraph(Serializable):
|
|
|
156
134
|
|
|
157
135
|
class GraphSerializer(SerializableSerializer):
|
|
158
136
|
@buffered
|
|
159
|
-
def serial(self, obj:
|
|
137
|
+
def serial(self, obj: EntityGraph, context: Dict):
|
|
160
138
|
serializable_graph = SerializableGraph.from_graph(obj)
|
|
161
139
|
return [], [serializable_graph], False
|
|
162
140
|
|
|
163
|
-
def deserial(
|
|
164
|
-
|
|
165
|
-
) -> Union[TileableGraph, ChunkGraph]:
|
|
166
|
-
serializable_graph: SerializableGraph = subs[0]
|
|
141
|
+
def deserial(self, serialized: List, context: Dict, subs: List) -> TileableGraph:
|
|
142
|
+
serializable_graph: EntityGraph = subs[0]
|
|
167
143
|
return serializable_graph.to_graph()
|
|
168
144
|
|
|
169
145
|
|
|
@@ -22,13 +22,6 @@ from .base import (
|
|
|
22
22
|
)
|
|
23
23
|
from .core import TileableOperatorMixin, estimate_size, execute
|
|
24
24
|
from .fetch import Fetch, FetchMixin, FetchShuffle, ShuffleFetchType
|
|
25
|
-
from .
|
|
26
|
-
from .objects import (
|
|
27
|
-
MergeDictOperator,
|
|
28
|
-
ObjectFetch,
|
|
29
|
-
ObjectFuseChunk,
|
|
30
|
-
ObjectFuseChunkMixin,
|
|
31
|
-
ObjectOperator,
|
|
32
|
-
ObjectOperatorMixin,
|
|
33
|
-
)
|
|
25
|
+
from .objects import MergeDictOperator, ObjectFetch, ObjectOperator, ObjectOperatorMixin
|
|
34
26
|
from .shuffle import MapReduceOperator, ShuffleProxy
|
|
27
|
+
from .utils import add_fetch_builder, build_fetch
|
maxframe/core/operator/base.py
CHANGED
|
@@ -12,11 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import functools
|
|
16
15
|
import weakref
|
|
17
16
|
from copy import deepcopy
|
|
18
17
|
from enum import Enum
|
|
19
|
-
from functools import partial
|
|
18
|
+
from functools import lru_cache, partial
|
|
20
19
|
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
21
20
|
|
|
22
21
|
from ...serialization.core import Placeholder
|
|
@@ -37,7 +36,6 @@ from ...serialization.serializables.core import SerializableSerializer
|
|
|
37
36
|
from ...typing_ import OperatorType
|
|
38
37
|
from ...utils import AttributeDict, classproperty, get_user_call_point, tokenize
|
|
39
38
|
from ..base import Base
|
|
40
|
-
from ..entity.chunks import Chunk
|
|
41
39
|
from ..entity.core import ENTITY_TYPE, Entity, EntityData
|
|
42
40
|
from ..entity.output_types import OutputType
|
|
43
41
|
from ..entity.tileables import Tileable
|
|
@@ -90,7 +88,7 @@ class SchedulingHint(Serializable):
|
|
|
90
88
|
priority = Int32Field("priority", default=None)
|
|
91
89
|
|
|
92
90
|
@classproperty
|
|
93
|
-
@
|
|
91
|
+
@lru_cache(1)
|
|
94
92
|
def all_hint_names(cls):
|
|
95
93
|
return list(cls._FIELDS)
|
|
96
94
|
|
|
@@ -341,7 +339,7 @@ class Operator(Base, OperatorLogicKeyGeneratorMixin, metaclass=OperatorMetaclass
|
|
|
341
339
|
raise ValueError("Outputs' size exceeds limitation")
|
|
342
340
|
|
|
343
341
|
@property
|
|
344
|
-
def outputs(self) -> List[
|
|
342
|
+
def outputs(self) -> List[Tileable]:
|
|
345
343
|
outputs = self._outputs
|
|
346
344
|
if outputs:
|
|
347
345
|
return [ref() for ref in outputs]
|
|
@@ -17,7 +17,6 @@ from ..entity import OutputType, register_fetch_class
|
|
|
17
17
|
from .base import Operator
|
|
18
18
|
from .core import TileableOperatorMixin
|
|
19
19
|
from .fetch import Fetch, FetchMixin
|
|
20
|
-
from .fuse import Fuse, FuseChunkMixin
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class ObjectOperator(Operator):
|
|
@@ -28,14 +27,6 @@ class ObjectOperatorMixin(TileableOperatorMixin):
|
|
|
28
27
|
_output_type_ = OutputType.object
|
|
29
28
|
|
|
30
29
|
|
|
31
|
-
class ObjectFuseChunkMixin(FuseChunkMixin, ObjectOperatorMixin):
|
|
32
|
-
__slots__ = ()
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class ObjectFuseChunk(ObjectFuseChunkMixin, Fuse):
|
|
36
|
-
pass
|
|
37
|
-
|
|
38
|
-
|
|
39
30
|
class ObjectFetch(FetchMixin, ObjectOperatorMixin, Fetch):
|
|
40
31
|
_output_type_ = OutputType.object
|
|
41
32
|
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...typing_ import EntityType, TileableType
|
|
16
|
+
from ..entity import TILEABLE_TYPE
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def build_fetch_tileable(tileable: TileableType) -> TileableType:
|
|
20
|
+
if tileable.is_coarse():
|
|
21
|
+
chunks = None
|
|
22
|
+
else:
|
|
23
|
+
chunks = []
|
|
24
|
+
for c in tileable.chunks:
|
|
25
|
+
fetch_chunk = build_fetch(c, index=c.index)
|
|
26
|
+
chunks.append(fetch_chunk)
|
|
27
|
+
|
|
28
|
+
tileable_op = tileable.op
|
|
29
|
+
params = tileable.params.copy()
|
|
30
|
+
|
|
31
|
+
new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
|
|
32
|
+
return new_op.new_tileables(
|
|
33
|
+
None,
|
|
34
|
+
chunks=chunks,
|
|
35
|
+
nsplits=tileable.nsplits,
|
|
36
|
+
_key=tileable.key,
|
|
37
|
+
_id=tileable.id,
|
|
38
|
+
**params,
|
|
39
|
+
)[0]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_type_to_builder = [
|
|
43
|
+
(TILEABLE_TYPE, build_fetch_tileable),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def build_fetch(entity: EntityType, **kw) -> EntityType:
|
|
48
|
+
for entity_types, func in _type_to_builder:
|
|
49
|
+
if isinstance(entity, entity_types):
|
|
50
|
+
return func(entity, **kw)
|
|
51
|
+
raise TypeError(f"Type {type(entity)} not supported")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def add_fetch_builder(entity_type, builder_func):
|
|
55
|
+
_type_to_builder.append((entity_type, builder_func))
|
maxframe/dataframe/__init__.py
CHANGED
|
@@ -43,20 +43,20 @@ def around(df, decimals=0, *args, **kwargs):
|
|
|
43
43
|
return op(df)
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
# FIXME Series input of decimals not supported yet
|
|
46
47
|
around.__frame_doc__ = """
|
|
47
48
|
Round a DataFrame to a variable number of decimal places.
|
|
48
49
|
|
|
49
50
|
Parameters
|
|
50
51
|
----------
|
|
51
|
-
decimals : int, dict
|
|
52
|
+
decimals : int, dict
|
|
52
53
|
Number of decimal places to round each column to. If an int is
|
|
53
54
|
given, round each column to the same number of places.
|
|
54
55
|
Otherwise dict and Series round to variable numbers of places.
|
|
55
56
|
Column names should be in the keys if `decimals` is a
|
|
56
|
-
dict-like
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
ignored.
|
|
57
|
+
dict-like. Any columns not included in `decimals` will be left
|
|
58
|
+
as is. Elements of `decimals` which are not columns of the
|
|
59
|
+
input will be ignored.
|
|
60
60
|
*args
|
|
61
61
|
Additional keywords have no effect but might be accepted for
|
|
62
62
|
compatibility with numpy.
|
|
@@ -107,18 +107,6 @@ places as value
|
|
|
107
107
|
1 0.0 1.0
|
|
108
108
|
2 0.7 0.0
|
|
109
109
|
3 0.2 0.0
|
|
110
|
-
|
|
111
|
-
Using a Series, the number of places for specific columns can be
|
|
112
|
-
specified with the column names as index and the number of
|
|
113
|
-
decimal places as value
|
|
114
|
-
|
|
115
|
-
>>> decimals = md.Series([0, 1], index=['cats', 'dogs'])
|
|
116
|
-
>>> df.round(decimals).execute()
|
|
117
|
-
dogs cats
|
|
118
|
-
0 0.2 0.0
|
|
119
|
-
1 0.0 1.0
|
|
120
|
-
2 0.7 0.0
|
|
121
|
-
3 0.2 0.0
|
|
122
110
|
"""
|
|
123
111
|
around.__series_doc__ = """
|
|
124
112
|
Round each value in a Series to the given number of decimals.
|
|
@@ -39,7 +39,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
39
39
|
raise NotImplementedError
|
|
40
40
|
|
|
41
41
|
@classmethod
|
|
42
|
-
def _calc_properties(cls, x1, x2=None, axis="columns"):
|
|
42
|
+
def _calc_properties(cls, x1, x2=None, axis="columns", level=None):
|
|
43
43
|
if isinstance(x1, DATAFRAME_TYPE) and (
|
|
44
44
|
x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
|
|
45
45
|
):
|
|
@@ -108,7 +108,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
108
108
|
index = copy.copy(x1.index_value)
|
|
109
109
|
index_shape = x1.shape[0]
|
|
110
110
|
else:
|
|
111
|
-
index = infer_index_value(
|
|
111
|
+
index = infer_index_value(
|
|
112
|
+
x1.index_value, x2.index_value, level=level
|
|
113
|
+
)
|
|
112
114
|
if index.key == x1.index_value.key == x2.index_value.key and (
|
|
113
115
|
not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
|
|
114
116
|
):
|
|
@@ -141,7 +143,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
141
143
|
column_shape = len(dtypes)
|
|
142
144
|
else: # pragma: no cover
|
|
143
145
|
dtypes = x1.dtypes # FIXME
|
|
144
|
-
columns = infer_index_value(
|
|
146
|
+
columns = infer_index_value(
|
|
147
|
+
x1.columns_value, x2.index_value, level=level
|
|
148
|
+
)
|
|
145
149
|
column_shape = np.nan
|
|
146
150
|
else:
|
|
147
151
|
assert axis == "index" or axis == 0
|
|
@@ -169,7 +173,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
169
173
|
],
|
|
170
174
|
index=x1.dtypes.index,
|
|
171
175
|
)
|
|
172
|
-
index = infer_index_value(
|
|
176
|
+
index = infer_index_value(
|
|
177
|
+
x1.index_value, x2.index_value, level=level
|
|
178
|
+
)
|
|
173
179
|
index_shape = np.nan
|
|
174
180
|
return {
|
|
175
181
|
"shape": (index_shape, column_shape),
|
|
@@ -187,7 +193,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
187
193
|
index = copy.copy(x1.index_value)
|
|
188
194
|
index_shape = x1.shape[0]
|
|
189
195
|
else:
|
|
190
|
-
index = infer_index_value(
|
|
196
|
+
index = infer_index_value(
|
|
197
|
+
x1.index_value, x2.index_value, level=level
|
|
198
|
+
)
|
|
191
199
|
if index.key == x1.index_value.key == x2.index_value.key and (
|
|
192
200
|
not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
|
|
193
201
|
):
|
|
@@ -237,14 +245,14 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
237
245
|
self._check_inputs(x1, x2)
|
|
238
246
|
if isinstance(x1, DATAFRAME_TYPE) or isinstance(x2, DATAFRAME_TYPE):
|
|
239
247
|
df1, df2 = (x1, x2) if isinstance(x1, DATAFRAME_TYPE) else (x2, x1)
|
|
240
|
-
kw = self._calc_properties(df1, df2, axis=self.axis)
|
|
248
|
+
kw = self._calc_properties(df1, df2, axis=self.axis, level=self.level)
|
|
241
249
|
if not pd.api.types.is_scalar(df2):
|
|
242
250
|
return self.new_dataframe([x1, x2], **kw)
|
|
243
251
|
else:
|
|
244
252
|
return self.new_dataframe([df1], **kw)
|
|
245
253
|
if isinstance(x1, SERIES_TYPE) or isinstance(x2, SERIES_TYPE):
|
|
246
254
|
s1, s2 = (x1, x2) if isinstance(x1, SERIES_TYPE) else (x2, x1)
|
|
247
|
-
kw = self._calc_properties(s1, s2)
|
|
255
|
+
kw = self._calc_properties(s1, s2, level=self.level)
|
|
248
256
|
if not pd.api.types.is_scalar(s2):
|
|
249
257
|
return self.new_series([x1, x2], **kw)
|
|
250
258
|
else:
|