maxframe 0.1.0b4__cp37-cp37m-win_amd64.whl → 1.0.0__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (214) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win_amd64.pyd +0 -0
  3. maxframe/codegen.py +56 -5
  4. maxframe/config/config.py +78 -10
  5. maxframe/config/validators.py +42 -11
  6. maxframe/conftest.py +58 -14
  7. maxframe/core/__init__.py +2 -16
  8. maxframe/core/entity/__init__.py +1 -12
  9. maxframe/core/entity/executable.py +1 -1
  10. maxframe/core/entity/objects.py +46 -45
  11. maxframe/core/entity/output_types.py +0 -3
  12. maxframe/core/entity/tests/test_objects.py +43 -0
  13. maxframe/core/entity/tileables.py +5 -78
  14. maxframe/core/graph/__init__.py +2 -2
  15. maxframe/core/graph/builder/__init__.py +0 -1
  16. maxframe/core/graph/builder/base.py +5 -4
  17. maxframe/core/graph/builder/tileable.py +4 -4
  18. maxframe/core/graph/builder/utils.py +4 -8
  19. maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
  20. maxframe/core/graph/core.pyx +4 -4
  21. maxframe/core/graph/entity.py +9 -33
  22. maxframe/core/operator/__init__.py +2 -9
  23. maxframe/core/operator/base.py +3 -5
  24. maxframe/core/operator/objects.py +0 -9
  25. maxframe/core/operator/utils.py +55 -0
  26. maxframe/dataframe/__init__.py +2 -1
  27. maxframe/dataframe/arithmetic/around.py +5 -17
  28. maxframe/dataframe/arithmetic/core.py +15 -7
  29. maxframe/dataframe/arithmetic/docstring.py +7 -33
  30. maxframe/dataframe/arithmetic/equal.py +4 -2
  31. maxframe/dataframe/arithmetic/greater.py +4 -2
  32. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  33. maxframe/dataframe/arithmetic/less.py +2 -2
  34. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  36. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  37. maxframe/dataframe/core.py +58 -12
  38. maxframe/dataframe/datasource/date_range.py +2 -2
  39. maxframe/dataframe/datasource/read_odps_query.py +120 -24
  40. maxframe/dataframe/datasource/read_odps_table.py +9 -4
  41. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  42. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  43. maxframe/dataframe/datastore/to_odps.py +28 -0
  44. maxframe/dataframe/extensions/__init__.py +5 -0
  45. maxframe/dataframe/extensions/flatjson.py +131 -0
  46. maxframe/dataframe/extensions/flatmap.py +317 -0
  47. maxframe/dataframe/extensions/reshuffle.py +1 -1
  48. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  49. maxframe/dataframe/groupby/core.py +1 -1
  50. maxframe/dataframe/groupby/cum.py +0 -1
  51. maxframe/dataframe/groupby/fill.py +4 -1
  52. maxframe/dataframe/groupby/getitem.py +6 -0
  53. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  54. maxframe/dataframe/groupby/transform.py +5 -1
  55. maxframe/dataframe/indexing/align.py +1 -1
  56. maxframe/dataframe/indexing/loc.py +6 -4
  57. maxframe/dataframe/indexing/rename.py +5 -28
  58. maxframe/dataframe/indexing/sample.py +0 -1
  59. maxframe/dataframe/indexing/set_index.py +68 -1
  60. maxframe/dataframe/initializer.py +11 -1
  61. maxframe/dataframe/merge/__init__.py +9 -1
  62. maxframe/dataframe/merge/concat.py +41 -31
  63. maxframe/dataframe/merge/merge.py +237 -3
  64. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  65. maxframe/dataframe/misc/__init__.py +4 -0
  66. maxframe/dataframe/misc/apply.py +6 -11
  67. maxframe/dataframe/misc/case_when.py +141 -0
  68. maxframe/dataframe/misc/describe.py +2 -2
  69. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  70. maxframe/dataframe/misc/eval.py +4 -0
  71. maxframe/dataframe/misc/memory_usage.py +2 -2
  72. maxframe/dataframe/misc/pct_change.py +1 -83
  73. maxframe/dataframe/misc/pivot_table.py +262 -0
  74. maxframe/dataframe/misc/tests/test_misc.py +93 -1
  75. maxframe/dataframe/misc/transform.py +1 -30
  76. maxframe/dataframe/misc/value_counts.py +4 -17
  77. maxframe/dataframe/missing/dropna.py +1 -1
  78. maxframe/dataframe/missing/fillna.py +5 -5
  79. maxframe/dataframe/operators.py +1 -17
  80. maxframe/dataframe/plotting/core.py +2 -2
  81. maxframe/dataframe/reduction/core.py +4 -3
  82. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  83. maxframe/dataframe/sort/sort_values.py +1 -11
  84. maxframe/dataframe/statistics/corr.py +3 -3
  85. maxframe/dataframe/statistics/quantile.py +13 -19
  86. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  87. maxframe/dataframe/tests/test_initializer.py +33 -2
  88. maxframe/dataframe/utils.py +33 -11
  89. maxframe/dataframe/window/expanding.py +5 -3
  90. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  91. maxframe/errors.py +13 -0
  92. maxframe/extension.py +12 -0
  93. maxframe/io/__init__.py +13 -0
  94. maxframe/io/objects/__init__.py +24 -0
  95. maxframe/io/objects/core.py +140 -0
  96. maxframe/io/objects/tensor.py +76 -0
  97. maxframe/io/objects/tests/__init__.py +13 -0
  98. maxframe/io/objects/tests/test_object_io.py +97 -0
  99. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  100. maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
  101. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  102. maxframe/io/odpsio/tableio.py +719 -0
  103. maxframe/io/odpsio/tests/__init__.py +13 -0
  104. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
  105. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  106. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  107. maxframe/io/odpsio/volumeio.py +63 -0
  108. maxframe/learn/contrib/__init__.py +3 -1
  109. maxframe/learn/contrib/graph/__init__.py +15 -0
  110. maxframe/learn/contrib/graph/connected_components.py +215 -0
  111. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  112. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  113. maxframe/learn/contrib/llm/__init__.py +16 -0
  114. maxframe/learn/contrib/llm/core.py +54 -0
  115. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  116. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  117. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  118. maxframe/learn/contrib/llm/text.py +42 -0
  119. maxframe/learn/contrib/utils.py +52 -0
  120. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  121. maxframe/learn/contrib/xgboost/classifier.py +110 -0
  122. maxframe/learn/contrib/xgboost/core.py +241 -0
  123. maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
  124. maxframe/learn/contrib/xgboost/predict.py +121 -0
  125. maxframe/learn/contrib/xgboost/regressor.py +71 -0
  126. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  127. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  128. maxframe/learn/contrib/xgboost/train.py +132 -0
  129. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  130. maxframe/learn/utils/__init__.py +15 -0
  131. maxframe/learn/utils/core.py +29 -0
  132. maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
  133. maxframe/lib/mmh3.pyi +43 -0
  134. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  135. maxframe/lib/wrapped_pickle.py +2 -1
  136. maxframe/opcodes.py +11 -0
  137. maxframe/protocol.py +154 -27
  138. maxframe/remote/core.py +4 -8
  139. maxframe/serialization/__init__.py +1 -0
  140. maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
  141. maxframe/serialization/core.pxd +3 -0
  142. maxframe/serialization/core.pyi +64 -0
  143. maxframe/serialization/core.pyx +67 -26
  144. maxframe/serialization/exception.py +1 -1
  145. maxframe/serialization/pandas.py +52 -17
  146. maxframe/serialization/serializables/core.py +180 -15
  147. maxframe/serialization/serializables/field_type.py +4 -1
  148. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  149. maxframe/serialization/tests/test_serial.py +2 -1
  150. maxframe/session.py +37 -2
  151. maxframe/tensor/__init__.py +81 -2
  152. maxframe/tensor/arithmetic/isclose.py +1 -0
  153. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  154. maxframe/tensor/core.py +5 -136
  155. maxframe/tensor/datasource/array.py +7 -2
  156. maxframe/tensor/datasource/full.py +1 -1
  157. maxframe/tensor/datasource/scalar.py +1 -1
  158. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  159. maxframe/tensor/indexing/flatnonzero.py +1 -1
  160. maxframe/tensor/indexing/getitem.py +2 -0
  161. maxframe/tensor/merge/__init__.py +2 -0
  162. maxframe/tensor/merge/concatenate.py +101 -0
  163. maxframe/tensor/merge/tests/test_merge.py +30 -1
  164. maxframe/tensor/merge/vstack.py +74 -0
  165. maxframe/tensor/{base → misc}/__init__.py +4 -0
  166. maxframe/tensor/misc/atleast_1d.py +72 -0
  167. maxframe/tensor/misc/atleast_2d.py +70 -0
  168. maxframe/tensor/misc/atleast_3d.py +85 -0
  169. maxframe/tensor/misc/tests/__init__.py +13 -0
  170. maxframe/tensor/{base → misc}/transpose.py +22 -18
  171. maxframe/tensor/misc/unique.py +205 -0
  172. maxframe/tensor/operators.py +1 -7
  173. maxframe/tensor/random/core.py +1 -1
  174. maxframe/tensor/reduction/count_nonzero.py +2 -1
  175. maxframe/tensor/reduction/mean.py +1 -0
  176. maxframe/tensor/reduction/nanmean.py +1 -0
  177. maxframe/tensor/reduction/nanvar.py +2 -0
  178. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  179. maxframe/tensor/reduction/var.py +2 -0
  180. maxframe/tensor/statistics/quantile.py +2 -2
  181. maxframe/tensor/utils.py +2 -22
  182. maxframe/tests/test_protocol.py +34 -0
  183. maxframe/tests/test_utils.py +0 -12
  184. maxframe/tests/utils.py +17 -2
  185. maxframe/typing_.py +4 -1
  186. maxframe/udf.py +62 -3
  187. maxframe/utils.py +112 -86
  188. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +4 -4
  189. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
  190. maxframe_client/__init__.py +0 -1
  191. maxframe_client/clients/framedriver.py +4 -1
  192. maxframe_client/fetcher.py +123 -54
  193. maxframe_client/session/consts.py +3 -0
  194. maxframe_client/session/graph.py +8 -2
  195. maxframe_client/session/odps.py +223 -40
  196. maxframe_client/session/task.py +108 -80
  197. maxframe_client/tests/test_fetcher.py +21 -3
  198. maxframe_client/tests/test_session.py +136 -8
  199. maxframe/core/entity/chunks.py +0 -68
  200. maxframe/core/entity/fuse.py +0 -73
  201. maxframe/core/graph/builder/chunk.py +0 -430
  202. maxframe/odpsio/tableio.py +0 -300
  203. maxframe/odpsio/volumeio.py +0 -95
  204. maxframe_client/clients/spe.py +0 -104
  205. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  206. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  207. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  208. /maxframe/tensor/{base → misc}/astype.py +0 -0
  209. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  210. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  211. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  212. /maxframe/tensor/{base → misc}/where.py +0 -0
  213. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +0 -0
  214. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
@@ -12,63 +12,57 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict
15
+ from typing import Any, Dict, Type
16
16
 
17
- from ...serialization.serializables import FieldTypes, ListField
18
- from ...utils import skip_na_call
19
- from .chunks import Chunk, ChunkData
17
+ from ...serialization import load_type
18
+ from ...serialization.serializables import StringField
20
19
  from .core import Entity
21
20
  from .executable import _ToObjectMixin
22
21
  from .tileables import TileableData
23
22
 
24
23
 
25
- class ObjectChunkData(ChunkData):
26
- # chunk whose data could be any serializable
24
+ class ObjectData(TileableData, _ToObjectMixin):
27
25
  __slots__ = ()
28
26
  type_name = "Object"
27
+ # workaround for removed field since v0.1.0b5
28
+ # todo remove this when all versions below v1.0.0rc1 is eliminated
29
+ _legacy_deprecated_non_primitives = ["_chunks"]
30
+ _legacy_new_non_primitives = ["object_class"]
29
31
 
30
- def __init__(self, op=None, index=None, **kw):
31
- super().__init__(_op=op, _index=index, **kw)
32
-
33
- @property
34
- def params(self) -> Dict[str, Any]:
35
- # params return the properties which useful to rebuild a new chunk
36
- return {
37
- "index": self.index,
38
- }
39
-
40
- @params.setter
41
- def params(self, new_params: Dict[str, Any]):
42
- params = new_params.copy()
43
- params.pop("index", None) # index not needed to update
44
- if params: # pragma: no cover
45
- raise TypeError(f"Unknown params: {list(params)}")
32
+ object_class = StringField("object_class", default=None)
46
33
 
47
34
  @classmethod
48
- def get_params_from_data(cls, data: Any) -> Dict[str, Any]:
49
- return dict()
50
-
51
-
52
- class ObjectChunk(Chunk):
53
- __slots__ = ()
54
- _allow_data_type_ = (ObjectChunkData,)
55
- type_name = "Object"
56
-
57
-
58
- class ObjectData(TileableData, _ToObjectMixin):
59
- __slots__ = ()
60
- type_name = "Object"
61
-
62
- # optional fields
63
- _chunks = ListField(
64
- "chunks",
65
- FieldTypes.reference(ObjectChunkData),
66
- on_serialize=skip_na_call(lambda x: [it.data for it in x]),
67
- on_deserialize=skip_na_call(lambda x: [ObjectChunk(it) for it in x]),
68
- )
35
+ def get_entity_class(cls) -> Type["Object"]:
36
+ if getattr(cls, "_entity_class", None) is not None:
37
+ return cls._entity_class
38
+ assert cls.__qualname__[-4:] == "Data"
39
+ target_class_name = cls.__module__ + "#" + cls.__qualname__[:-4]
40
+ cls._entity_class = load_type(target_class_name, Object)
41
+ return cls._entity_class
42
+
43
+ def __new__(cls, op=None, nsplits=None, **kw):
44
+ if cls is ObjectData:
45
+ obj_cls = kw.get("object_class")
46
+ if isinstance(obj_cls, str):
47
+ obj_cls = load_type(obj_cls, (Object, ObjectData))
48
+ if isinstance(obj_cls, type) and issubclass(obj_cls, Object):
49
+ obj_cls = obj_cls.get_data_class()
50
+
51
+ if obj_cls is not None and cls is not obj_cls:
52
+ return obj_cls(op=op, nsplits=nsplits, **kw)
53
+ return super().__new__(cls)
69
54
 
70
55
  def __init__(self, op=None, nsplits=None, **kw):
56
+ obj_cls = kw.pop("object_class", None)
57
+ if isinstance(obj_cls, type):
58
+ if isinstance(obj_cls, type) and issubclass(obj_cls, Object):
59
+ obj_cls = obj_cls.get_data_class()
60
+ kw["object_class"] = obj_cls.__module__ + "#" + obj_cls.__qualname__
61
+
71
62
  super().__init__(_op=op, _nsplits=nsplits, **kw)
63
+ if self.object_class is None and type(self) is not ObjectData:
64
+ cls = type(self)
65
+ self.object_class = cls.__module__ + "#" + cls.__qualname__
72
66
 
73
67
  def __repr__(self):
74
68
  return f"Object <op={type(self.op).__name__}, key={self.key}>"
@@ -76,7 +70,7 @@ class ObjectData(TileableData, _ToObjectMixin):
76
70
  @property
77
71
  def params(self):
78
72
  # params return the properties which useful to rebuild a new tileable object
79
- return dict()
73
+ return dict(object_class=self.object_class)
80
74
 
81
75
  @params.setter
82
76
  def params(self, new_params: Dict[str, Any]):
@@ -95,6 +89,13 @@ class Object(Entity, _ToObjectMixin):
95
89
  _allow_data_type_ = (ObjectData,)
96
90
  type_name = "Object"
97
91
 
92
+ @classmethod
93
+ def get_data_class(cls) -> Type[ObjectData]:
94
+ if getattr(cls, "_data_class", None) is not None:
95
+ return cls._data_class
96
+ target_class_name = cls.__module__ + "#" + cls.__qualname__ + "Data"
97
+ cls._data_class = load_type(target_class_name, ObjectData)
98
+ return cls._data_class
99
+
98
100
 
99
101
  OBJECT_TYPE = (Object, ObjectData)
100
- OBJECT_CHUNK_TYPE = (ObjectChunk, ObjectChunkData)
@@ -15,7 +15,6 @@
15
15
  import functools
16
16
  from enum import Enum
17
17
 
18
- from .fuse import FUSE_CHUNK_TYPE
19
18
  from .objects import OBJECT_TYPE
20
19
 
21
20
 
@@ -77,8 +76,6 @@ def get_output_types(*objs, unknown_as=None):
77
76
  for obj in objs:
78
77
  if obj is None:
79
78
  continue
80
- elif isinstance(obj, FUSE_CHUNK_TYPE):
81
- obj = obj.chunk
82
79
 
83
80
  try:
84
81
  output_types.append(_get_output_type_by_cls(type(obj)))
@@ -0,0 +1,43 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..objects import Object, ObjectData
16
+
17
+
18
+ class TestSubObjectData(ObjectData):
19
+ __test__ = False
20
+
21
+
22
+ class TestSubObject(Object):
23
+ __test__ = False
24
+
25
+
26
+ def test_object_init():
27
+ assert TestSubObjectData.get_entity_class() is TestSubObject
28
+
29
+ obj = ObjectData(
30
+ object_class=TestSubObjectData.__module__ + "#" + TestSubObjectData.__name__
31
+ )
32
+ assert isinstance(obj, TestSubObjectData)
33
+
34
+ obj = ObjectData(object_class=TestSubObjectData)
35
+ assert isinstance(obj, TestSubObjectData)
36
+
37
+ obj = ObjectData(
38
+ object_class=TestSubObject.__module__ + "#" + TestSubObject.__name__
39
+ )
40
+ assert isinstance(obj, TestSubObjectData)
41
+
42
+ obj = ObjectData(object_class=TestSubObject)
43
+ assert isinstance(obj, TestSubObjectData)
@@ -15,17 +15,15 @@
15
15
  import builtins
16
16
  import itertools
17
17
  from operator import attrgetter
18
- from typing import Callable, List
19
18
  from weakref import WeakKeyDictionary, WeakSet
20
19
 
21
20
  import numpy as np
22
21
 
23
22
  from ...serialization.serializables import BoolField, FieldTypes, TupleField
24
- from ...typing_ import OperatorType, TileableType
23
+ from ...typing_ import TileableType
25
24
  from ...utils import on_deserialize_shape, on_serialize_nsplits, on_serialize_shape
26
25
  from ..base import Base
27
26
  from ..mode import enter_mode
28
- from .chunks import Chunk
29
27
  from .core import Entity, EntityData
30
28
  from .executable import _ExecutableMixin
31
29
 
@@ -34,79 +32,6 @@ class NotSupportTile(Exception):
34
32
  pass
35
33
 
36
34
 
37
- class OperatorTilesHandler:
38
- _handlers = dict()
39
-
40
- @classmethod
41
- def _get_op_cls(cls, op: OperatorType):
42
- if isinstance(op, type):
43
- return op
44
- return type(op)
45
-
46
- @classmethod
47
- def register(
48
- cls, op: OperatorType, tile_handler: Callable[[OperatorType], TileableType]
49
- ):
50
- cls._handlers[cls._get_op_cls(op)] = tile_handler
51
-
52
- @classmethod
53
- def unregister(cls, op: OperatorType):
54
- del cls._handlers[cls._get_op_cls(op)]
55
-
56
- @classmethod
57
- def get_handler(
58
- cls, op: OperatorType
59
- ) -> Callable[[OperatorType], List[TileableType]]:
60
- op_cls = cls._get_op_cls(op)
61
- return cls._handlers.get(op_cls, op_cls.tile)
62
-
63
- @classmethod
64
- def _assign_to(
65
- cls,
66
- tile_after_tensor_datas: List["TileableData"],
67
- tile_before_tensor_datas: List["TileableData"],
68
- ):
69
- assert len(tile_after_tensor_datas) == len(tile_before_tensor_datas)
70
-
71
- for tile_after_tensor_data, tile_before_tensor_data in zip(
72
- tile_after_tensor_datas, tile_before_tensor_datas
73
- ):
74
- if tile_before_tensor_data is None:
75
- # garbage collected
76
- continue
77
- tile_after_tensor_data.copy_to(tile_before_tensor_data)
78
- tile_before_tensor_data.op.outputs = tile_before_tensor_datas
79
-
80
- @enter_mode(kernel=True)
81
- def dispatch(self, op: OperatorType):
82
- op_cls = self._get_op_cls(op)
83
- tiled = None
84
- cause = None
85
-
86
- if op_cls in self._handlers:
87
- tiled = self._handlers[op_cls](op)
88
- else:
89
- try:
90
- tiled = op_cls.tile(op)
91
- except NotImplementedError as ex:
92
- cause = ex
93
- for super_cls in op_cls.__mro__:
94
- if super_cls in self._handlers:
95
- h = self._handlers[op_cls] = self._handlers[super_cls]
96
- tiled = h(op)
97
- break
98
-
99
- if tiled is not None:
100
- return tiled if isinstance(tiled, list) else [tiled]
101
- else:
102
- raise NotImplementedError(f"{type(op)} does not support tile") from cause
103
-
104
-
105
- handler = OperatorTilesHandler()
106
- register = OperatorTilesHandler.register
107
- unregister = OperatorTilesHandler.unregister
108
-
109
-
110
35
  class _ChunksIndexer:
111
36
  __slots__ = ("_tileable",)
112
37
 
@@ -231,7 +156,7 @@ entity_view_handler = EntityDataModificationHandler()
231
156
 
232
157
 
233
158
  class TileableData(EntityData, _ExecutableMixin):
234
- __slots__ = "_cix", "_entities", "_executed_sessions"
159
+ __slots__ = "_chunks", "_cix", "_entities", "_executed_sessions"
235
160
  _no_copy_attrs_ = Base._no_copy_attrs_ | {"_cix"}
236
161
 
237
162
  # optional fields
@@ -245,6 +170,8 @@ class TileableData(EntityData, _ExecutableMixin):
245
170
  cache = BoolField("cache", default=False)
246
171
 
247
172
  def __init__(self: TileableType, *args, **kwargs):
173
+ if kwargs.get("chunks") is not None:
174
+ self._chunks = kwargs.pop("chunks")
248
175
  if kwargs.get("_nsplits", None) is not None:
249
176
  kwargs["_nsplits"] = tuple(tuple(s) for s in kwargs["_nsplits"])
250
177
 
@@ -270,7 +197,7 @@ class TileableData(EntityData, _ExecutableMixin):
270
197
  return tuple(map(len, self._nsplits))
271
198
 
272
199
  @property
273
- def chunks(self) -> List[Chunk]:
200
+ def chunks(self) -> list:
274
201
  return getattr(self, "_chunks", None)
275
202
 
276
203
  @property
@@ -12,6 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .builder import ChunkGraphBuilder, TileableGraphBuilder, TileContext, TileStatus
15
+ from .builder import TileableGraphBuilder
16
16
  from .core import DAG, DirectedGraph, GraphContainsCycleError
17
- from .entity import ChunkGraph, EntityGraph, GraphSerializer, TileableGraph
17
+ from .entity import EntityGraph, GraphSerializer, TileableGraph
@@ -12,5 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .chunk import ChunkGraphBuilder, TileContext, TileStatus
16
15
  from .tileable import TileableGraphBuilder
@@ -14,10 +14,10 @@
14
14
 
15
15
 
16
16
  from abc import ABC, abstractmethod
17
- from typing import Generator, List, Set, Union
17
+ from typing import Generator, List, Set
18
18
 
19
19
  from ....typing_ import EntityType
20
- from ..entity import ChunkGraph, EntityGraph, TileableGraph
20
+ from ..entity import EntityGraph
21
21
 
22
22
 
23
23
  def _default_inputs_selector(inputs: List[EntityType]) -> List[EntityType]:
@@ -43,7 +43,7 @@ class AbstractGraphBuilder(ABC):
43
43
 
44
44
  def _add_nodes(
45
45
  self,
46
- graph: Union[ChunkGraph, TileableGraph],
46
+ graph: EntityGraph,
47
47
  nodes: List[EntityType],
48
48
  visited: Set,
49
49
  ):
@@ -75,7 +75,7 @@ class AbstractGraphBuilder(ABC):
75
75
  nodes.append(out)
76
76
 
77
77
  @abstractmethod
78
- def build(self) -> Generator[Union[EntityGraph, ChunkGraph], None, None]:
78
+ def build(self) -> Generator[EntityGraph, None, None]:
79
79
  """
80
80
  Build a entity graph.
81
81
 
@@ -84,3 +84,4 @@ class AbstractGraphBuilder(ABC):
84
84
  graph : EntityGraph
85
85
  Entity graph.
86
86
  """
87
+ raise NotImplementedError
@@ -12,10 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Generator, Union
15
+ from typing import Generator
16
16
 
17
17
  from ...mode import enter_mode
18
- from ..entity import ChunkGraph, TileableGraph
18
+ from ..entity import TileableGraph
19
19
  from .base import AbstractGraphBuilder
20
20
 
21
21
 
@@ -26,9 +26,9 @@ class TileableGraphBuilder(AbstractGraphBuilder):
26
26
  super().__init__(graph=graph)
27
27
 
28
28
  @enter_mode(build=True, kernel=True)
29
- def _build(self) -> Union[TileableGraph, ChunkGraph]:
29
+ def _build(self) -> TileableGraph:
30
30
  self._add_nodes(self._graph, list(self._graph.result_tileables), set())
31
31
  return self._graph
32
32
 
33
- def build(self) -> Generator[Union[TileableGraph, ChunkGraph], None, None]:
33
+ def build(self) -> Generator[TileableGraph, None, None]:
34
34
  yield self._build()
@@ -13,12 +13,11 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import itertools
16
- from typing import List, Union
16
+ from typing import List
17
17
 
18
18
  from ....typing_ import TileableType
19
19
  from ...mode import enter_mode
20
- from ..entity import ChunkGraph, TileableGraph
21
- from .chunk import ChunkGraphBuilder
20
+ from ..entity import EntityGraph, TileableGraph
22
21
  from .tileable import TileableGraphBuilder
23
22
 
24
23
 
@@ -28,14 +27,11 @@ def build_graph(
28
27
  tile: bool = False,
29
28
  fuse_enabled: bool = True,
30
29
  **chunk_graph_build_kwargs
31
- ) -> Union[TileableGraph, ChunkGraph]:
30
+ ) -> EntityGraph:
32
31
  tileables = list(itertools.chain(*(tileable.op.outputs for tileable in tileables)))
33
32
  tileable_graph = TileableGraph(tileables)
34
33
  tileable_graph_builder = TileableGraphBuilder(tileable_graph)
35
34
  tileable_graph = next(tileable_graph_builder.build())
36
35
  if not tile:
37
36
  return tileable_graph
38
- chunk_graph_builder = ChunkGraphBuilder(
39
- tileable_graph, fuse_enabled=fuse_enabled, **chunk_graph_build_kwargs
40
- )
41
- return next(chunk_graph_builder.build())
37
+ raise NotImplementedError
Binary file
@@ -354,10 +354,10 @@ cdef class DirectedGraph:
354
354
  sio.write(f'"Chunk:{self._gen_chunk_key(input_chunk, trunc_key)}" {chunk_style}\n')
355
355
  visited.add(input_chunk.key)
356
356
  if op.key not in visited:
357
- sio.write(f'"{op_name}:{op.key[:trunc_key]}" {operator_style}\n')
357
+ sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" {operator_style}\n')
358
358
  visited.add(op.key)
359
359
  sio.write(f'"Chunk:{self._gen_chunk_key(input_chunk, trunc_key)}" -> '
360
- f'"{op_name}:{op.key[:trunc_key]}"\n')
360
+ f'"{op_name}:{op.key[:trunc_key]}_{id(op)}"\n')
361
361
 
362
362
  for output_chunk in (op.outputs or []):
363
363
  if output_chunk.key not in visited:
@@ -367,9 +367,9 @@ cdef class DirectedGraph:
367
367
  sio.write(f'"Chunk:{self._gen_chunk_key(output_chunk, trunc_key)}" {tmp_chunk_style}\n')
368
368
  visited.add(output_chunk.key)
369
369
  if op.key not in visited:
370
- sio.write(f'"{op_name}:{op.key[:trunc_key]}" {operator_style}\n')
370
+ sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" {operator_style}\n')
371
371
  visited.add(op.key)
372
- sio.write(f'"{op_name}:{op.key[:trunc_key]}" -> '
372
+ sio.write(f'"{op_name}:{op.key[:trunc_key]}_{id(op)}" -> '
373
373
  f'"Chunk:{self._gen_chunk_key(output_chunk, trunc_key)}"')
374
374
  if show_columns:
375
375
  sio.write(f' [ label={get_col_names(output_chunk)} ]')
@@ -13,9 +13,9 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from abc import ABCMeta, abstractmethod
16
- from typing import Dict, Iterable, List, Union
16
+ from typing import Dict, Iterable, List
17
17
 
18
- from ...core import Chunk, Tileable
18
+ from ...core import Tileable
19
19
  from ...serialization.core import buffered
20
20
  from ...serialization.serializables import BoolField, DictField, ListField, Serializable
21
21
  from ...serialization.serializables.core import SerializableSerializer
@@ -97,26 +97,6 @@ class TileableGraph(EntityGraph, Iterable[Tileable]):
97
97
  return self._logic_key
98
98
 
99
99
 
100
- class ChunkGraph(EntityGraph, Iterable[Chunk]):
101
- _result_chunks: List[Chunk]
102
-
103
- def __init__(self, result_chunks: List[Chunk] = None):
104
- super().__init__()
105
- self._result_chunks = result_chunks
106
-
107
- @property
108
- def result_chunks(self):
109
- return self._result_chunks
110
-
111
- @property
112
- def results(self):
113
- return self._result_chunks
114
-
115
- @results.setter
116
- def results(self, new_results):
117
- self._result_chunks = new_results
118
-
119
-
120
100
  class SerializableGraph(Serializable):
121
101
  _is_chunk = BoolField("is_chunk")
122
102
  # TODO(qinxuye): remove this logic when we handle fetch elegantly,
@@ -132,12 +112,11 @@ class SerializableGraph(Serializable):
132
112
  _results = ListField("results")
133
113
 
134
114
  @classmethod
135
- def from_graph(cls, graph: Union[TileableGraph, ChunkGraph]) -> "SerializableGraph":
115
+ def from_graph(cls, graph: EntityGraph) -> "SerializableGraph":
136
116
  from ..operator import Fetch
137
117
 
138
- is_chunk = isinstance(graph, ChunkGraph)
139
118
  return SerializableGraph(
140
- _is_chunk=is_chunk,
119
+ _is_chunk=False,
141
120
  _fetch_nodes=[chunk for chunk in graph if isinstance(chunk.op, Fetch)],
142
121
  _nodes=graph._nodes,
143
122
  _predecessors=graph._predecessors,
@@ -145,9 +124,8 @@ class SerializableGraph(Serializable):
145
124
  _results=graph.results,
146
125
  )
147
126
 
148
- def to_graph(self) -> Union[TileableGraph, ChunkGraph]:
149
- graph_cls = ChunkGraph if self._is_chunk else TileableGraph
150
- graph = graph_cls(self._results)
127
+ def to_graph(self) -> EntityGraph:
128
+ graph = TileableGraph(self._results)
151
129
  graph._nodes.update(self._nodes)
152
130
  graph._predecessors.update(self._predecessors)
153
131
  graph._successors.update(self._successors)
@@ -156,14 +134,12 @@ class SerializableGraph(Serializable):
156
134
 
157
135
  class GraphSerializer(SerializableSerializer):
158
136
  @buffered
159
- def serial(self, obj: Union[TileableGraph, ChunkGraph], context: Dict):
137
+ def serial(self, obj: EntityGraph, context: Dict):
160
138
  serializable_graph = SerializableGraph.from_graph(obj)
161
139
  return [], [serializable_graph], False
162
140
 
163
- def deserial(
164
- self, serialized: List, context: Dict, subs: List
165
- ) -> Union[TileableGraph, ChunkGraph]:
166
- serializable_graph: SerializableGraph = subs[0]
141
+ def deserial(self, serialized: List, context: Dict, subs: List) -> TileableGraph:
142
+ serializable_graph: EntityGraph = subs[0]
167
143
  return serializable_graph.to_graph()
168
144
 
169
145
 
@@ -22,13 +22,6 @@ from .base import (
22
22
  )
23
23
  from .core import TileableOperatorMixin, estimate_size, execute
24
24
  from .fetch import Fetch, FetchMixin, FetchShuffle, ShuffleFetchType
25
- from .fuse import Fuse, FuseChunkMixin
26
- from .objects import (
27
- MergeDictOperator,
28
- ObjectFetch,
29
- ObjectFuseChunk,
30
- ObjectFuseChunkMixin,
31
- ObjectOperator,
32
- ObjectOperatorMixin,
33
- )
25
+ from .objects import MergeDictOperator, ObjectFetch, ObjectOperator, ObjectOperatorMixin
34
26
  from .shuffle import MapReduceOperator, ShuffleProxy
27
+ from .utils import add_fetch_builder, build_fetch
@@ -12,11 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import functools
16
15
  import weakref
17
16
  from copy import deepcopy
18
17
  from enum import Enum
19
- from functools import partial
18
+ from functools import lru_cache, partial
20
19
  from typing import Any, Dict, List, Optional, Tuple, Type, Union
21
20
 
22
21
  from ...serialization.core import Placeholder
@@ -37,7 +36,6 @@ from ...serialization.serializables.core import SerializableSerializer
37
36
  from ...typing_ import OperatorType
38
37
  from ...utils import AttributeDict, classproperty, get_user_call_point, tokenize
39
38
  from ..base import Base
40
- from ..entity.chunks import Chunk
41
39
  from ..entity.core import ENTITY_TYPE, Entity, EntityData
42
40
  from ..entity.output_types import OutputType
43
41
  from ..entity.tileables import Tileable
@@ -90,7 +88,7 @@ class SchedulingHint(Serializable):
90
88
  priority = Int32Field("priority", default=None)
91
89
 
92
90
  @classproperty
93
- @functools.lru_cache(1)
91
+ @lru_cache(1)
94
92
  def all_hint_names(cls):
95
93
  return list(cls._FIELDS)
96
94
 
@@ -341,7 +339,7 @@ class Operator(Base, OperatorLogicKeyGeneratorMixin, metaclass=OperatorMetaclass
341
339
  raise ValueError("Outputs' size exceeds limitation")
342
340
 
343
341
  @property
344
- def outputs(self) -> List[Union[Chunk, Tileable]]:
342
+ def outputs(self) -> List[Tileable]:
345
343
  outputs = self._outputs
346
344
  if outputs:
347
345
  return [ref() for ref in outputs]
@@ -17,7 +17,6 @@ from ..entity import OutputType, register_fetch_class
17
17
  from .base import Operator
18
18
  from .core import TileableOperatorMixin
19
19
  from .fetch import Fetch, FetchMixin
20
- from .fuse import Fuse, FuseChunkMixin
21
20
 
22
21
 
23
22
  class ObjectOperator(Operator):
@@ -28,14 +27,6 @@ class ObjectOperatorMixin(TileableOperatorMixin):
28
27
  _output_type_ = OutputType.object
29
28
 
30
29
 
31
- class ObjectFuseChunkMixin(FuseChunkMixin, ObjectOperatorMixin):
32
- __slots__ = ()
33
-
34
-
35
- class ObjectFuseChunk(ObjectFuseChunkMixin, Fuse):
36
- pass
37
-
38
-
39
30
  class ObjectFetch(FetchMixin, ObjectOperatorMixin, Fetch):
40
31
  _output_type_ = OutputType.object
41
32
 
@@ -0,0 +1,55 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...typing_ import EntityType, TileableType
16
+ from ..entity import TILEABLE_TYPE
17
+
18
+
19
+ def build_fetch_tileable(tileable: TileableType) -> TileableType:
20
+ if tileable.is_coarse():
21
+ chunks = None
22
+ else:
23
+ chunks = []
24
+ for c in tileable.chunks:
25
+ fetch_chunk = build_fetch(c, index=c.index)
26
+ chunks.append(fetch_chunk)
27
+
28
+ tileable_op = tileable.op
29
+ params = tileable.params.copy()
30
+
31
+ new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
32
+ return new_op.new_tileables(
33
+ None,
34
+ chunks=chunks,
35
+ nsplits=tileable.nsplits,
36
+ _key=tileable.key,
37
+ _id=tileable.id,
38
+ **params,
39
+ )[0]
40
+
41
+
42
+ _type_to_builder = [
43
+ (TILEABLE_TYPE, build_fetch_tileable),
44
+ ]
45
+
46
+
47
+ def build_fetch(entity: EntityType, **kw) -> EntityType:
48
+ for entity_types, func in _type_to_builder:
49
+ if isinstance(entity, entity_types):
50
+ return func(entity, **kw)
51
+ raise TypeError(f"Type {type(entity)} not supported")
52
+
53
+
54
+ def add_fetch_builder(entity_type, builder_func):
55
+ _type_to_builder.append((entity_type, builder_func))