maxframe 0.1.0b5__cp38-cp38-win32.whl → 1.0.0__cp38-cp38-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (203) hide show
  1. maxframe/_utils.cp38-win32.pyd +0 -0
  2. maxframe/codegen.py +10 -4
  3. maxframe/config/config.py +68 -10
  4. maxframe/config/validators.py +42 -11
  5. maxframe/conftest.py +58 -14
  6. maxframe/core/__init__.py +2 -16
  7. maxframe/core/entity/__init__.py +1 -12
  8. maxframe/core/entity/executable.py +1 -1
  9. maxframe/core/entity/objects.py +46 -45
  10. maxframe/core/entity/output_types.py +0 -3
  11. maxframe/core/entity/tests/test_objects.py +43 -0
  12. maxframe/core/entity/tileables.py +5 -78
  13. maxframe/core/graph/__init__.py +2 -2
  14. maxframe/core/graph/builder/__init__.py +0 -1
  15. maxframe/core/graph/builder/base.py +5 -4
  16. maxframe/core/graph/builder/tileable.py +4 -4
  17. maxframe/core/graph/builder/utils.py +4 -8
  18. maxframe/core/graph/core.cp38-win32.pyd +0 -0
  19. maxframe/core/graph/core.pyx +4 -4
  20. maxframe/core/graph/entity.py +9 -33
  21. maxframe/core/operator/__init__.py +2 -9
  22. maxframe/core/operator/base.py +3 -5
  23. maxframe/core/operator/objects.py +0 -9
  24. maxframe/core/operator/utils.py +55 -0
  25. maxframe/dataframe/__init__.py +1 -1
  26. maxframe/dataframe/arithmetic/around.py +5 -17
  27. maxframe/dataframe/arithmetic/core.py +15 -7
  28. maxframe/dataframe/arithmetic/docstring.py +7 -33
  29. maxframe/dataframe/arithmetic/equal.py +4 -2
  30. maxframe/dataframe/arithmetic/greater.py +4 -2
  31. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  32. maxframe/dataframe/arithmetic/less.py +2 -2
  33. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  34. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  36. maxframe/dataframe/core.py +31 -7
  37. maxframe/dataframe/datasource/date_range.py +2 -2
  38. maxframe/dataframe/datasource/read_odps_query.py +117 -23
  39. maxframe/dataframe/datasource/read_odps_table.py +6 -3
  40. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  41. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  42. maxframe/dataframe/datastore/to_odps.py +28 -0
  43. maxframe/dataframe/extensions/__init__.py +5 -0
  44. maxframe/dataframe/extensions/flatjson.py +131 -0
  45. maxframe/dataframe/extensions/flatmap.py +317 -0
  46. maxframe/dataframe/extensions/reshuffle.py +1 -1
  47. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  48. maxframe/dataframe/groupby/core.py +1 -1
  49. maxframe/dataframe/groupby/cum.py +0 -1
  50. maxframe/dataframe/groupby/fill.py +4 -1
  51. maxframe/dataframe/groupby/getitem.py +6 -0
  52. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  53. maxframe/dataframe/groupby/transform.py +5 -1
  54. maxframe/dataframe/indexing/align.py +1 -1
  55. maxframe/dataframe/indexing/loc.py +6 -4
  56. maxframe/dataframe/indexing/rename.py +5 -28
  57. maxframe/dataframe/indexing/sample.py +0 -1
  58. maxframe/dataframe/indexing/set_index.py +68 -1
  59. maxframe/dataframe/initializer.py +11 -1
  60. maxframe/dataframe/merge/__init__.py +9 -1
  61. maxframe/dataframe/merge/concat.py +41 -31
  62. maxframe/dataframe/merge/merge.py +237 -3
  63. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  64. maxframe/dataframe/misc/apply.py +5 -10
  65. maxframe/dataframe/misc/case_when.py +1 -1
  66. maxframe/dataframe/misc/describe.py +2 -2
  67. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  68. maxframe/dataframe/misc/eval.py +4 -0
  69. maxframe/dataframe/misc/memory_usage.py +2 -2
  70. maxframe/dataframe/misc/pct_change.py +1 -83
  71. maxframe/dataframe/misc/tests/test_misc.py +33 -2
  72. maxframe/dataframe/misc/transform.py +1 -30
  73. maxframe/dataframe/misc/value_counts.py +4 -17
  74. maxframe/dataframe/missing/dropna.py +1 -1
  75. maxframe/dataframe/missing/fillna.py +5 -5
  76. maxframe/dataframe/operators.py +1 -17
  77. maxframe/dataframe/reduction/core.py +2 -2
  78. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  79. maxframe/dataframe/sort/sort_values.py +1 -11
  80. maxframe/dataframe/statistics/corr.py +3 -3
  81. maxframe/dataframe/statistics/quantile.py +13 -19
  82. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  83. maxframe/dataframe/tests/test_initializer.py +33 -2
  84. maxframe/dataframe/utils.py +26 -11
  85. maxframe/dataframe/window/expanding.py +5 -3
  86. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  87. maxframe/errors.py +13 -0
  88. maxframe/extension.py +12 -0
  89. maxframe/io/__init__.py +13 -0
  90. maxframe/io/objects/__init__.py +24 -0
  91. maxframe/io/objects/core.py +140 -0
  92. maxframe/io/objects/tensor.py +76 -0
  93. maxframe/io/objects/tests/__init__.py +13 -0
  94. maxframe/io/objects/tests/test_object_io.py +97 -0
  95. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  96. maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
  97. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  98. maxframe/io/odpsio/tableio.py +719 -0
  99. maxframe/io/odpsio/tests/__init__.py +13 -0
  100. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
  101. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  102. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  103. maxframe/io/odpsio/volumeio.py +63 -0
  104. maxframe/learn/contrib/__init__.py +3 -1
  105. maxframe/learn/contrib/graph/__init__.py +15 -0
  106. maxframe/learn/contrib/graph/connected_components.py +215 -0
  107. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  108. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  109. maxframe/learn/contrib/llm/__init__.py +16 -0
  110. maxframe/learn/contrib/llm/core.py +54 -0
  111. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  112. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  113. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  114. maxframe/learn/contrib/llm/text.py +42 -0
  115. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  116. maxframe/learn/contrib/xgboost/core.py +87 -2
  117. maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
  118. maxframe/learn/contrib/xgboost/predict.py +29 -46
  119. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  120. maxframe/learn/contrib/xgboost/train.py +29 -18
  121. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  122. maxframe/lib/mmh3.cp38-win32.pyd +0 -0
  123. maxframe/lib/mmh3.pyi +43 -0
  124. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  125. maxframe/lib/wrapped_pickle.py +2 -1
  126. maxframe/opcodes.py +8 -0
  127. maxframe/protocol.py +154 -27
  128. maxframe/remote/core.py +4 -8
  129. maxframe/serialization/__init__.py +1 -0
  130. maxframe/serialization/core.cp38-win32.pyd +0 -0
  131. maxframe/serialization/core.pxd +3 -0
  132. maxframe/serialization/core.pyi +3 -0
  133. maxframe/serialization/core.pyx +67 -26
  134. maxframe/serialization/exception.py +1 -1
  135. maxframe/serialization/pandas.py +52 -17
  136. maxframe/serialization/serializables/core.py +180 -15
  137. maxframe/serialization/serializables/field_type.py +4 -1
  138. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  139. maxframe/serialization/tests/test_serial.py +2 -1
  140. maxframe/session.py +9 -2
  141. maxframe/tensor/__init__.py +81 -2
  142. maxframe/tensor/arithmetic/isclose.py +1 -0
  143. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  144. maxframe/tensor/core.py +5 -136
  145. maxframe/tensor/datasource/array.py +3 -0
  146. maxframe/tensor/datasource/full.py +1 -1
  147. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  148. maxframe/tensor/indexing/flatnonzero.py +1 -1
  149. maxframe/tensor/indexing/getitem.py +2 -0
  150. maxframe/tensor/merge/__init__.py +2 -0
  151. maxframe/tensor/merge/concatenate.py +101 -0
  152. maxframe/tensor/merge/tests/test_merge.py +30 -1
  153. maxframe/tensor/merge/vstack.py +74 -0
  154. maxframe/tensor/{base → misc}/__init__.py +2 -0
  155. maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
  156. maxframe/tensor/misc/atleast_2d.py +70 -0
  157. maxframe/tensor/misc/atleast_3d.py +85 -0
  158. maxframe/tensor/misc/tests/__init__.py +13 -0
  159. maxframe/tensor/{base → misc}/transpose.py +22 -18
  160. maxframe/tensor/{base → misc}/unique.py +3 -3
  161. maxframe/tensor/operators.py +1 -7
  162. maxframe/tensor/random/core.py +1 -1
  163. maxframe/tensor/reduction/count_nonzero.py +2 -1
  164. maxframe/tensor/reduction/mean.py +1 -0
  165. maxframe/tensor/reduction/nanmean.py +1 -0
  166. maxframe/tensor/reduction/nanvar.py +2 -0
  167. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  168. maxframe/tensor/reduction/var.py +2 -0
  169. maxframe/tensor/statistics/quantile.py +2 -2
  170. maxframe/tensor/utils.py +2 -22
  171. maxframe/tests/test_protocol.py +34 -0
  172. maxframe/tests/test_utils.py +0 -12
  173. maxframe/tests/utils.py +17 -2
  174. maxframe/typing_.py +4 -1
  175. maxframe/udf.py +8 -9
  176. maxframe/utils.py +106 -86
  177. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
  178. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
  179. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
  180. maxframe_client/__init__.py +0 -1
  181. maxframe_client/clients/framedriver.py +4 -1
  182. maxframe_client/fetcher.py +81 -74
  183. maxframe_client/session/consts.py +3 -0
  184. maxframe_client/session/graph.py +8 -2
  185. maxframe_client/session/odps.py +194 -40
  186. maxframe_client/session/task.py +94 -39
  187. maxframe_client/tests/test_fetcher.py +21 -3
  188. maxframe_client/tests/test_session.py +109 -8
  189. maxframe/core/entity/chunks.py +0 -68
  190. maxframe/core/entity/fuse.py +0 -73
  191. maxframe/core/graph/builder/chunk.py +0 -430
  192. maxframe/odpsio/tableio.py +0 -322
  193. maxframe/odpsio/volumeio.py +0 -95
  194. maxframe_client/clients/spe.py +0 -104
  195. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  196. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  197. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  198. /maxframe/tensor/{base → misc}/astype.py +0 -0
  199. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  200. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  201. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  202. /maxframe/tensor/{base → misc}/where.py +0 -0
  203. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,7 @@
15
15
  import logging
16
16
  from collections import OrderedDict
17
17
 
18
- from .... import opcodes as OperandDef
18
+ from .... import opcodes
19
19
  from ....core import OutputType
20
20
  from ....core.operator.base import Operator
21
21
  from ....core.operator.core import TileableOperatorMixin
@@ -29,6 +29,7 @@ from ....serialization.serializables import (
29
29
  KeyField,
30
30
  ListField,
31
31
  )
32
+ from .core import Booster
32
33
  from .dmatrix import ToDMatrix, to_dmatrix
33
34
 
34
35
  logger = logging.getLogger(__name__)
@@ -41,7 +42,7 @@ def _on_serialize_evals(evals_val):
41
42
 
42
43
 
43
44
  class XGBTrain(Operator, TileableOperatorMixin):
44
- _op_type_ = OperandDef.XGBOOST_TRAIN
45
+ _op_type_ = opcodes.XGBOOST_TRAIN
45
46
 
46
47
  params = DictField("params", key_type=FieldTypes.string, default=None)
47
48
  dtrain = KeyField("dtrain", default=None)
@@ -59,49 +60,59 @@ class XGBTrain(Operator, TileableOperatorMixin):
59
60
  num_boost_round = Int64Field("num_boost_round", default=10)
60
61
  num_class = Int64Field("num_class", default=None)
61
62
 
62
- # Store evals_result in local to store the remote evals_result
63
- evals_result: dict = None
64
-
65
63
  def __init__(self, gpu=None, **kw):
66
64
  super().__init__(gpu=gpu, **kw)
67
65
  if self.output_types is None:
68
66
  self.output_types = [OutputType.object]
67
+ if self.has_evals_result:
68
+ self.output_types.append(OutputType.object)
69
69
 
70
70
  def _set_inputs(self, inputs):
71
71
  super()._set_inputs(inputs)
72
72
  self.dtrain = self._inputs[0]
73
73
  rest = self._inputs[1:]
74
- if self.evals is not None:
74
+ if self.has_evals_result:
75
75
  evals_dict = OrderedDict(self.evals)
76
76
  new_evals_dict = OrderedDict()
77
77
  for new_key, val in zip(rest, evals_dict.values()):
78
78
  new_evals_dict[new_key] = val
79
79
  self.evals = list(new_evals_dict.items())
80
80
 
81
- def __call__(self):
81
+ def __call__(self, evals_result):
82
82
  inputs = [self.dtrain]
83
- if self.evals is not None:
83
+ if self.has_evals_result:
84
84
  inputs.extend(e[0] for e in self.evals)
85
- return self.new_tileable(inputs)
85
+ return self.new_tileables(
86
+ inputs, object_class=Booster, evals_result=evals_result
87
+ )[0]
88
+
89
+ @property
90
+ def output_limit(self):
91
+ return 2 if self.has_evals_result else 1
92
+
93
+ @property
94
+ def has_evals_result(self) -> bool:
95
+ return self.evals
86
96
 
87
97
 
88
98
  def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwargs):
89
99
  """
90
- Train XGBoost model in Mars manner.
100
+ Train XGBoost model in MaxFrame manner.
91
101
 
92
102
  Parameters
93
103
  ----------
94
- Parameters are the same as `xgboost.train`.
104
+ Parameters are the same as `xgboost.train`. Note that train is an eager-execution
105
+ API if evals is passed, thus the call will be blocked until training finished.
95
106
 
96
107
  Returns
97
108
  -------
98
109
  results: Booster
99
110
  """
100
111
 
101
- evals_result = evals_result or dict()
102
- evals = None or ()
103
-
112
+ evals_result = evals_result if evals_result is not None else dict()
104
113
  processed_evals = []
114
+ session = kwargs.pop("session", None)
115
+ run_kwargs = kwargs.pop("run_kwargs", dict())
105
116
  if evals:
106
117
  for eval_dmatrix, name in evals:
107
118
  if not isinstance(name, str):
@@ -110,12 +121,12 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
110
121
  processed_evals.append((eval_dmatrix, name))
111
122
  else:
112
123
  processed_evals.append((to_dmatrix(eval_dmatrix), name))
113
-
114
- return XGBTrain(
124
+ data = XGBTrain(
115
125
  params=params,
116
126
  dtrain=dtrain,
117
127
  evals=processed_evals,
118
128
  evals_result=evals_result,
119
129
  num_class=num_class,
120
- **kwargs
121
- )()
130
+ **kwargs,
131
+ )(evals_result)
132
+ return data.execute(session=session, **run_kwargs) if evals else data
@@ -12,18 +12,15 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from ... import opcodes
16
- from ...serialization.serializables import ReferenceField
17
- from ..graph import ChunkGraph
18
- from .base import Operator
15
+ from ..core.entity.objects import Object, ObjectData
19
16
 
20
17
 
21
- class Fuse(Operator):
22
- __slots__ = ("_fuse_graph",)
23
- _op_type_ = opcodes.FUSE
18
+ class ModelData(ObjectData):
19
+ pass
24
20
 
25
- fuse_graph = ReferenceField("fuse_graph", ChunkGraph)
26
21
 
22
+ class Model(Object):
23
+ pass
27
24
 
28
- class FuseChunkMixin:
29
- __slots__ = ()
25
+
26
+ MODEL_TYPE = (Model, ModelData)
Binary file
maxframe/lib/mmh3.pyi ADDED
@@ -0,0 +1,43 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Tuple
16
+
17
+ def hash(key, seed=0, signed=True) -> int:
18
+ """
19
+ Return a 32 bit integer.
20
+ """
21
+
22
+ def hash_from_buffer(key, seed=0, signed=True) -> int:
23
+ """
24
+ Return a 32 bit integer. Designed for large memory-views such as numpy arrays.
25
+ """
26
+
27
+ def hash64(key, seed=0, x64arch=True, signed=True) -> Tuple[int, int]:
28
+ """
29
+ Return a tuple of two 64 bit integers for a string. Optimized for
30
+ the x64 bit architecture when x64arch=True, otherwise for x86.
31
+ """
32
+
33
+ def hash128(key, seed=0, x64arch=True, signed=False) -> int:
34
+ """
35
+ Return a 128 bit long integer. Optimized for the x64 bit architecture
36
+ when x64arch=True, otherwise for x86.
37
+ """
38
+
39
+ def hash_bytes(key, seed=0, x64arch=True) -> bytes:
40
+ """
41
+ Return a 128 bit hash value as bytes for a string. Optimized for the
42
+ x64 bit architecture when x64arch=True, otherwise for the x86.
43
+ """
@@ -55,13 +55,13 @@ def test_sparse_creation():
55
55
  s = SparseNDArray(s1_data)
56
56
  assert s.ndim == 2
57
57
  assert isinstance(s, SparseMatrix)
58
- assert_array_equal(s.toarray(), s1_data.A)
59
- assert_array_equal(s.todense(), s1_data.A)
58
+ assert_array_equal(s.toarray(), s1_data.toarray())
59
+ assert_array_equal(s.todense(), s1_data.toarray())
60
60
 
61
61
  ss = pickle.loads(pickle.dumps(s))
62
62
  assert s == ss
63
- assert_array_equal(ss.toarray(), s1_data.A)
64
- assert_array_equal(ss.todense(), s1_data.A)
63
+ assert_array_equal(ss.toarray(), s1_data.toarray())
64
+ assert_array_equal(ss.todense(), s1_data.toarray())
65
65
 
66
66
  v = SparseNDArray(v1, shape=(3,))
67
67
  assert s.ndim
@@ -331,12 +331,12 @@ def test_sparse_dot():
331
331
 
332
332
  assert_array_equal(mls.dot(s1, v1_s), s1.dot(v1_data))
333
333
  assert_array_equal(mls.dot(s2, v1_s), s2.dot(v1_data))
334
- assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.A))
335
- assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.A))
334
+ assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.toarray()))
335
+ assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.toarray()))
336
336
  assert_array_equal(mls.dot(v1_s, v1_s), v1_data.dot(v1_data), almost=True)
337
337
  assert_array_equal(mls.dot(v2_s, v2_s), v2_data.dot(v2_data), almost=True)
338
338
 
339
- assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.A))
339
+ assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.toarray()))
340
340
  assert_array_equal(mls.dot(v1_s, v1_s, sparse=False), v1_data.dot(v1_data))
341
341
 
342
342
 
@@ -390,7 +390,7 @@ def test_sparse_fill_diagonal():
390
390
  arr = SparseNDArray(s1)
391
391
  arr.fill_diagonal(3)
392
392
 
393
- expected = s1.copy().A
393
+ expected = s1.copy().toarray()
394
394
  np.fill_diagonal(expected, 3)
395
395
 
396
396
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -399,7 +399,7 @@ def test_sparse_fill_diagonal():
399
399
  arr = SparseNDArray(s1)
400
400
  arr.fill_diagonal(3, wrap=True)
401
401
 
402
- expected = s1.copy().A
402
+ expected = s1.copy().toarray()
403
403
  np.fill_diagonal(expected, 3, wrap=True)
404
404
 
405
405
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -408,7 +408,7 @@ def test_sparse_fill_diagonal():
408
408
  arr = SparseNDArray(s1)
409
409
  arr.fill_diagonal([1, 2, 3])
410
410
 
411
- expected = s1.copy().A
411
+ expected = s1.copy().toarray()
412
412
  np.fill_diagonal(expected, [1, 2, 3])
413
413
 
414
414
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -417,7 +417,7 @@ def test_sparse_fill_diagonal():
417
417
  arr = SparseNDArray(s1)
418
418
  arr.fill_diagonal([1, 2, 3], wrap=True)
419
419
 
420
- expected = s1.copy().A
420
+ expected = s1.copy().toarray()
421
421
  np.fill_diagonal(expected, [1, 2, 3], wrap=True)
422
422
 
423
423
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -427,7 +427,7 @@ def test_sparse_fill_diagonal():
427
427
  arr = SparseNDArray(s1)
428
428
  arr.fill_diagonal(val)
429
429
 
430
- expected = s1.copy().A
430
+ expected = s1.copy().toarray()
431
431
  np.fill_diagonal(expected, val)
432
432
 
433
433
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -437,7 +437,7 @@ def test_sparse_fill_diagonal():
437
437
  arr = SparseNDArray(s1)
438
438
  arr.fill_diagonal(val, wrap=True)
439
439
 
440
- expected = s1.copy().A
440
+ expected = s1.copy().toarray()
441
441
  np.fill_diagonal(expected, val, wrap=True)
442
442
 
443
443
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -447,7 +447,7 @@ def test_sparse_fill_diagonal():
447
447
  arr = SparseNDArray(s1)
448
448
  arr.fill_diagonal(val)
449
449
 
450
- expected = s1.copy().A
450
+ expected = s1.copy().toarray()
451
451
  np.fill_diagonal(expected, val)
452
452
 
453
453
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -457,7 +457,7 @@ def test_sparse_fill_diagonal():
457
457
  arr = SparseNDArray(s1)
458
458
  arr.fill_diagonal(val, wrap=True)
459
459
 
460
- expected = s1.copy().A
460
+ expected = s1.copy().toarray()
461
461
  np.fill_diagonal(expected, val, wrap=True)
462
462
 
463
463
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -120,7 +120,8 @@ class _UnpickleSwitch:
120
120
  @functools.wraps(func)
121
121
  async def wrapped(*args, **kwargs):
122
122
  with _UnpickleSwitch(forbidden=self._forbidden):
123
- return await func(*args, **kwargs)
123
+ ret = await func(*args, **kwargs)
124
+ return ret
124
125
 
125
126
  else:
126
127
 
maxframe/opcodes.py CHANGED
@@ -392,6 +392,10 @@ PIVOT_TABLE = 744
392
392
 
393
393
  FUSE = 801
394
394
 
395
+ # LLM
396
+ DASHSCOPE_TEXT_GENERATION = 810
397
+ DASHSCOPE_MULTI_MODAL_GENERATION = 811
398
+
395
399
  # table like input for tensor
396
400
  TABLE_COO = 1003
397
401
  # store tensor as coo format
@@ -532,6 +536,8 @@ STATSMODELS_TRAIN = 3012
532
536
  STATSMODELS_PREDICT = 3013
533
537
 
534
538
  # learn
539
+ CONNECTED_COMPONENTS = 3100
540
+
535
541
  # checks
536
542
  CHECK_NON_NEGATIVE = 3300
537
543
  # classifier check targets
@@ -566,6 +572,8 @@ CHOLESKY_FUSE = 999988
566
572
 
567
573
  # MaxFrame-dedicated functions
568
574
  DATAFRAME_RESHUFFLE = 10001
575
+ FLATMAP = 10002
576
+ FLATJSON = 10003
569
577
 
570
578
  # MaxFrame internal operators
571
579
  DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
maxframe/protocol.py CHANGED
@@ -15,7 +15,7 @@
15
15
  import base64
16
16
  import enum
17
17
  import uuid
18
- from typing import Any, Dict, Generic, List, Optional, Tuple, Type, TypeVar
18
+ from typing import Any, Dict, Generic, List, Optional, Type, TypeVar
19
19
 
20
20
  import pandas as pd
21
21
 
@@ -32,12 +32,12 @@ from .serialization.serializables import (
32
32
  EnumField,
33
33
  FieldTypes,
34
34
  Float64Field,
35
+ Int32Field,
35
36
  ListField,
36
37
  ReferenceField,
37
38
  Serializable,
38
39
  SeriesField,
39
40
  StringField,
40
- TupleField,
41
41
  )
42
42
 
43
43
  pickling_support.install()
@@ -71,6 +71,9 @@ class DagStatus(enum.Enum):
71
71
  CANCELLING = 4
72
72
  CANCELLED = 5
73
73
 
74
+ def is_terminated(self):
75
+ return self in (DagStatus.CANCELLED, DagStatus.SUCCEEDED, DagStatus.FAILED)
76
+
74
77
 
75
78
  class DimensionIndex(Serializable):
76
79
  is_slice: bool = BoolField("is_slice", default=None)
@@ -88,19 +91,6 @@ class DataSerializeType(enum.Enum):
88
91
  PICKLE = 0
89
92
 
90
93
 
91
- class VolumeDataMeta(Serializable):
92
- output_type: OutputType = EnumField(
93
- "output_type", OutputType, FieldTypes.int8, default=None
94
- )
95
- serial_type: DataSerializeType = EnumField(
96
- "serial_type", DataSerializeType, FieldTypes.int8, default=None
97
- )
98
- shape: Tuple[int, ...] = TupleField("shape", FieldTypes.int64, default=None)
99
- nsplits: Tuple[Tuple[int, ...], ...] = TupleField(
100
- "nsplits", FieldTypes.tuple(FieldTypes.tuple(FieldTypes.int64)), default=None
101
- )
102
-
103
-
104
94
  _result_type_to_info_cls: Dict[ResultType, Type["ResultInfo"]] = dict()
105
95
 
106
96
 
@@ -150,6 +140,9 @@ class ODPSTableResultInfo(ResultInfo):
150
140
  partition_specs: Optional[List[str]] = ListField(
151
141
  "partition_specs", FieldTypes.string, default=None
152
142
  )
143
+ table_meta: Optional["DataFrameTableMeta"] = ReferenceField(
144
+ "table_meta", default=None
145
+ )
153
146
 
154
147
  def __init__(self, result_type: ResultType = None, **kw):
155
148
  result_type = result_type or ResultType.ODPS_TABLE
@@ -160,8 +153,17 @@ class ODPSTableResultInfo(ResultInfo):
160
153
  ret["full_table_name"] = self.full_table_name
161
154
  if self.partition_specs:
162
155
  ret["partition_specs"] = self.partition_specs
156
+ if self.table_meta:
157
+ ret["table_meta"] = self.table_meta.to_json()
163
158
  return ret
164
159
 
160
+ @classmethod
161
+ def _json_to_kwargs(cls, serialized: dict) -> dict:
162
+ kw = super()._json_to_kwargs(serialized)
163
+ if "table_meta" in kw:
164
+ kw["table_meta"] = DataFrameTableMeta.from_json(kw["table_meta"])
165
+ return kw
166
+
165
167
 
166
168
  class ODPSVolumeResultInfo(ResultInfo):
167
169
  _result_type = ResultType.ODPS_VOLUME
@@ -190,9 +192,9 @@ class ErrorInfo(JsonSerializable):
190
192
  "error_tracebacks", FieldTypes.list
191
193
  )
192
194
  raw_error_source: ErrorSource = EnumField(
193
- "raw_error_source", ErrorSource, FieldTypes.int8
195
+ "raw_error_source", ErrorSource, FieldTypes.int8, default=None
194
196
  )
195
- raw_error_data: Optional[Exception] = AnyField("raw_error_data")
197
+ raw_error_data: Optional[Exception] = AnyField("raw_error_data", default=None)
196
198
 
197
199
  @classmethod
198
200
  def from_exception(cls, exc: Exception):
@@ -201,20 +203,29 @@ class ErrorInfo(JsonSerializable):
201
203
  return cls(messages, tracebacks, ErrorSource.PYTHON, exc)
202
204
 
203
205
  def reraise(self):
204
- if self.raw_error_source == ErrorSource.PYTHON:
206
+ if (
207
+ self.raw_error_source == ErrorSource.PYTHON
208
+ and self.raw_error_data is not None
209
+ ):
205
210
  raise self.raw_error_data
206
211
  raise RemoteException(self.error_messages, self.error_tracebacks, [])
207
212
 
208
213
  @classmethod
209
214
  def from_json(cls, serialized: dict) -> "ErrorInfo":
210
215
  kw = serialized.copy()
211
- kw["raw_error_source"] = ErrorSource(serialized["raw_error_source"])
216
+ if kw.get("raw_error_source") is not None:
217
+ kw["raw_error_source"] = ErrorSource(serialized["raw_error_source"])
218
+ else:
219
+ kw["raw_error_source"] = None
220
+
212
221
  if kw.get("raw_error_data"):
213
222
  bufs = [base64.b64decode(s) for s in kw["raw_error_data"]]
214
223
  try:
215
224
  kw["raw_error_data"] = pickle.loads(bufs[0], buffers=bufs[1:])
216
225
  except:
217
- kw["raw_error_data"] = None
226
+ # both error source and data shall be None to make sure
227
+ # RemoteException is raised.
228
+ kw["raw_error_source"] = kw["raw_error_data"] = None
218
229
  return cls(**kw)
219
230
 
220
231
  def to_json(self) -> dict:
@@ -227,7 +238,12 @@ class ErrorInfo(JsonSerializable):
227
238
  if isinstance(self.raw_error_data, (PickleContainer, RemoteException)):
228
239
  err_data_bufs = self.raw_error_data.get_buffers()
229
240
  elif isinstance(self.raw_error_data, BaseException):
230
- err_data_bufs = pickle_buffers(self.raw_error_data)
241
+ try:
242
+ err_data_bufs = pickle_buffers(self.raw_error_data)
243
+ except:
244
+ err_data_bufs = None
245
+ ret["raw_error_source"] = None
246
+
231
247
  if err_data_bufs:
232
248
  ret["raw_error_data"] = [
233
249
  base64.b64encode(s).decode() for s in err_data_bufs
@@ -249,9 +265,17 @@ class DagInfo(JsonSerializable):
249
265
  error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
250
266
  start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
251
267
  end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
268
+ subdag_infos: Dict[str, "SubDagInfo"] = DictField(
269
+ "subdag_infos",
270
+ key_type=FieldTypes.string,
271
+ value_type=FieldTypes.reference,
272
+ default_factory=dict,
273
+ )
252
274
 
253
275
  @classmethod
254
- def from_json(cls, serialized: dict) -> "DagInfo":
276
+ def from_json(cls, serialized: dict) -> Optional["DagInfo"]:
277
+ if serialized is None:
278
+ return None
255
279
  kw = serialized.copy()
256
280
  kw["status"] = DagStatus(kw["status"])
257
281
  if kw.get("tileable_to_result_infos"):
@@ -261,6 +285,10 @@ class DagInfo(JsonSerializable):
261
285
  }
262
286
  if kw.get("error_info"):
263
287
  kw["error_info"] = ErrorInfo.from_json(kw["error_info"])
288
+ if kw.get("subdag_infos"):
289
+ kw["subdag_infos"] = {
290
+ k: SubDagInfo.from_json(v) for k, v in kw["subdag_infos"].items()
291
+ }
264
292
  return DagInfo(**kw)
265
293
 
266
294
  def to_json(self) -> dict:
@@ -279,6 +307,8 @@ class DagInfo(JsonSerializable):
279
307
  }
280
308
  if self.error_info:
281
309
  ret["error_info"] = self.error_info.to_json()
310
+ if self.subdag_infos:
311
+ ret["subdag_infos"] = {k: v.to_json() for k, v in self.subdag_infos.items()}
282
312
  return ret
283
313
 
284
314
 
@@ -302,7 +332,9 @@ class SessionInfo(JsonSerializable):
302
332
  error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
303
333
 
304
334
  @classmethod
305
- def from_json(cls, serialized: dict) -> "SessionInfo":
335
+ def from_json(cls, serialized: dict) -> Optional["SessionInfo"]:
336
+ if serialized is None:
337
+ return None
306
338
  kw = serialized.copy()
307
339
  if kw.get("dag_infos"):
308
340
  kw["dag_infos"] = {
@@ -320,7 +352,10 @@ class SessionInfo(JsonSerializable):
320
352
  "idle_timestamp": self.idle_timestamp,
321
353
  }
322
354
  if self.dag_infos:
323
- ret["dag_infos"] = {k: v.to_json() for k, v in self.dag_infos.items()}
355
+ ret["dag_infos"] = {
356
+ k: v.to_json() if v is not None else None
357
+ for k, v in self.dag_infos.items()
358
+ }
324
359
  if self.error_info:
325
360
  ret["error_info"] = self.error_info.to_json()
326
361
  return ret
@@ -340,9 +375,32 @@ class ExecuteDagRequest(Serializable):
340
375
  value_type=FieldTypes.reference,
341
376
  default=None,
342
377
  )
378
+ new_settings: Dict[str, Any] = DictField(
379
+ "new_settings",
380
+ key_type=FieldTypes.string,
381
+ default=None,
382
+ )
383
+
384
+
385
+ class SubDagSubmitInstanceInfo(JsonSerializable):
386
+ submit_reason: str = StringField("submit_reason")
387
+ instance_id: str = StringField("instance_id")
388
+ subquery_id: Optional[int] = Int32Field("subquery_id", default=None)
389
+
390
+ @classmethod
391
+ def from_json(cls, serialized: dict) -> "SubDagSubmitInstanceInfo":
392
+ return SubDagSubmitInstanceInfo(**serialized)
393
+
394
+ def to_json(self) -> dict:
395
+ ret = {
396
+ "submit_reason": self.submit_reason,
397
+ "instance_id": self.instance_id,
398
+ "subquery_id": self.subquery_id,
399
+ }
400
+ return ret
343
401
 
344
402
 
345
- class SubDagInfo(Serializable):
403
+ class SubDagInfo(JsonSerializable):
346
404
  subdag_id: str = StringField("subdag_id")
347
405
  status: DagStatus = EnumField("status", DagStatus, FieldTypes.int8, default=None)
348
406
  progress: float = Float64Field("progress", default=None)
@@ -355,9 +413,52 @@ class SubDagInfo(Serializable):
355
413
  FieldTypes.reference,
356
414
  default_factory=dict,
357
415
  )
416
+ start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
417
+ end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
418
+ submit_instances: List[SubDagSubmitInstanceInfo] = ListField(
419
+ "submit_instances",
420
+ FieldTypes.reference,
421
+ default_factory=list,
422
+ )
423
+
424
+ @classmethod
425
+ def from_json(cls, serialized: dict) -> "SubDagInfo":
426
+ kw = serialized.copy()
427
+ kw["status"] = DagStatus(kw["status"])
428
+ if kw.get("tileable_to_result_infos"):
429
+ kw["tileable_to_result_infos"] = {
430
+ k: ResultInfo.from_json(s)
431
+ for k, s in kw["tileable_to_result_infos"].items()
432
+ }
433
+ if kw.get("error_info"):
434
+ kw["error_info"] = ErrorInfo.from_json(kw["error_info"])
435
+ if kw.get("submit_instances"):
436
+ kw["submit_instances"] = [
437
+ SubDagSubmitInstanceInfo.from_json(s) for s in kw["submit_instances"]
438
+ ]
439
+ return SubDagInfo(**kw)
440
+
441
+ def to_json(self) -> dict:
442
+ ret = {
443
+ "subdag_id": self.subdag_id,
444
+ "status": self.status.value,
445
+ "progress": self.progress,
446
+ "start_timestamp": self.start_timestamp,
447
+ "end_timestamp": self.end_timestamp,
448
+ }
449
+ if self.error_info:
450
+ ret["error_info"] = self.error_info.to_json()
451
+ if self.tileable_to_result_infos:
452
+ ret["tileable_to_result_infos"] = {
453
+ k: v.to_json() for k, v in self.tileable_to_result_infos.items()
454
+ }
455
+ if self.submit_instances:
456
+ ret["submit_instances"] = [i.to_json() for i in self.submit_instances]
457
+ return ret
358
458
 
359
459
 
360
460
  class ExecuteSubDagRequest(Serializable):
461
+ subdag_id: str = StringField("subdag_id")
361
462
  dag: TileableGraph = ReferenceField(
362
463
  "dag",
363
464
  on_serialize=SerializableGraph.from_graph,
@@ -371,7 +472,7 @@ class DecrefRequest(Serializable):
371
472
  keys: List[str] = ListField("keys", FieldTypes.string, default=None)
372
473
 
373
474
 
374
- class DataFrameTableMeta(Serializable):
475
+ class DataFrameTableMeta(JsonSerializable):
375
476
  __slots__ = "_pd_column_names", "_pd_index_level_names"
376
477
 
377
478
  table_name: Optional[str] = StringField("table_name", default=None)
@@ -402,7 +503,7 @@ class DataFrameTableMeta(Serializable):
402
503
  self._pd_index_level_names = self.pd_index_dtypes.index.tolist()
403
504
  return self._pd_index_level_names
404
505
 
405
- def __eq__(self, other: "Serializable") -> bool:
506
+ def __eq__(self, other: "DataFrameTableMeta") -> bool:
406
507
  if not isinstance(other, type(self)):
407
508
  return False
408
509
  for k in self._FIELDS:
@@ -413,3 +514,29 @@ class DataFrameTableMeta(Serializable):
413
514
  if not is_same:
414
515
  return False
415
516
  return True
517
+
518
+ def to_json(self) -> dict:
519
+ b64_pk = lambda x: base64.b64encode(pickle.dumps(x)).decode()
520
+ ret = {
521
+ "table_name": self.table_name,
522
+ "type": self.type.value,
523
+ "table_column_names": self.table_column_names,
524
+ "table_index_column_names": self.table_index_column_names,
525
+ "pd_column_dtypes": b64_pk(self.pd_column_dtypes),
526
+ "pd_column_level_names": b64_pk(self.pd_column_level_names),
527
+ "pd_index_dtypes": b64_pk(self.pd_index_dtypes),
528
+ }
529
+ return ret
530
+
531
+ @classmethod
532
+ def from_json(cls, serialized: dict) -> "DataFrameTableMeta":
533
+ b64_upk = lambda x: pickle.loads(base64.b64decode(x))
534
+ serialized.update(
535
+ {
536
+ "type": OutputType(serialized["type"]),
537
+ "pd_column_dtypes": b64_upk(serialized["pd_column_dtypes"]),
538
+ "pd_column_level_names": b64_upk(serialized["pd_column_level_names"]),
539
+ "pd_index_dtypes": b64_upk(serialized["pd_index_dtypes"]),
540
+ }
541
+ )
542
+ return DataFrameTableMeta(**serialized)