maxframe 0.1.0b5__cp39-cp39-win32.whl → 1.0.0__cp39-cp39-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (203) hide show
  1. maxframe/_utils.cp39-win32.pyd +0 -0
  2. maxframe/codegen.py +10 -4
  3. maxframe/config/config.py +68 -10
  4. maxframe/config/validators.py +42 -11
  5. maxframe/conftest.py +58 -14
  6. maxframe/core/__init__.py +2 -16
  7. maxframe/core/entity/__init__.py +1 -12
  8. maxframe/core/entity/executable.py +1 -1
  9. maxframe/core/entity/objects.py +46 -45
  10. maxframe/core/entity/output_types.py +0 -3
  11. maxframe/core/entity/tests/test_objects.py +43 -0
  12. maxframe/core/entity/tileables.py +5 -78
  13. maxframe/core/graph/__init__.py +2 -2
  14. maxframe/core/graph/builder/__init__.py +0 -1
  15. maxframe/core/graph/builder/base.py +5 -4
  16. maxframe/core/graph/builder/tileable.py +4 -4
  17. maxframe/core/graph/builder/utils.py +4 -8
  18. maxframe/core/graph/core.cp39-win32.pyd +0 -0
  19. maxframe/core/graph/core.pyx +4 -4
  20. maxframe/core/graph/entity.py +9 -33
  21. maxframe/core/operator/__init__.py +2 -9
  22. maxframe/core/operator/base.py +3 -5
  23. maxframe/core/operator/objects.py +0 -9
  24. maxframe/core/operator/utils.py +55 -0
  25. maxframe/dataframe/__init__.py +1 -1
  26. maxframe/dataframe/arithmetic/around.py +5 -17
  27. maxframe/dataframe/arithmetic/core.py +15 -7
  28. maxframe/dataframe/arithmetic/docstring.py +7 -33
  29. maxframe/dataframe/arithmetic/equal.py +4 -2
  30. maxframe/dataframe/arithmetic/greater.py +4 -2
  31. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  32. maxframe/dataframe/arithmetic/less.py +2 -2
  33. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  34. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  36. maxframe/dataframe/core.py +31 -7
  37. maxframe/dataframe/datasource/date_range.py +2 -2
  38. maxframe/dataframe/datasource/read_odps_query.py +117 -23
  39. maxframe/dataframe/datasource/read_odps_table.py +6 -3
  40. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  41. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  42. maxframe/dataframe/datastore/to_odps.py +28 -0
  43. maxframe/dataframe/extensions/__init__.py +5 -0
  44. maxframe/dataframe/extensions/flatjson.py +131 -0
  45. maxframe/dataframe/extensions/flatmap.py +317 -0
  46. maxframe/dataframe/extensions/reshuffle.py +1 -1
  47. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  48. maxframe/dataframe/groupby/core.py +1 -1
  49. maxframe/dataframe/groupby/cum.py +0 -1
  50. maxframe/dataframe/groupby/fill.py +4 -1
  51. maxframe/dataframe/groupby/getitem.py +6 -0
  52. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  53. maxframe/dataframe/groupby/transform.py +5 -1
  54. maxframe/dataframe/indexing/align.py +1 -1
  55. maxframe/dataframe/indexing/loc.py +6 -4
  56. maxframe/dataframe/indexing/rename.py +5 -28
  57. maxframe/dataframe/indexing/sample.py +0 -1
  58. maxframe/dataframe/indexing/set_index.py +68 -1
  59. maxframe/dataframe/initializer.py +11 -1
  60. maxframe/dataframe/merge/__init__.py +9 -1
  61. maxframe/dataframe/merge/concat.py +41 -31
  62. maxframe/dataframe/merge/merge.py +237 -3
  63. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  64. maxframe/dataframe/misc/apply.py +5 -10
  65. maxframe/dataframe/misc/case_when.py +1 -1
  66. maxframe/dataframe/misc/describe.py +2 -2
  67. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  68. maxframe/dataframe/misc/eval.py +4 -0
  69. maxframe/dataframe/misc/memory_usage.py +2 -2
  70. maxframe/dataframe/misc/pct_change.py +1 -83
  71. maxframe/dataframe/misc/tests/test_misc.py +33 -2
  72. maxframe/dataframe/misc/transform.py +1 -30
  73. maxframe/dataframe/misc/value_counts.py +4 -17
  74. maxframe/dataframe/missing/dropna.py +1 -1
  75. maxframe/dataframe/missing/fillna.py +5 -5
  76. maxframe/dataframe/operators.py +1 -17
  77. maxframe/dataframe/reduction/core.py +2 -2
  78. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  79. maxframe/dataframe/sort/sort_values.py +1 -11
  80. maxframe/dataframe/statistics/corr.py +3 -3
  81. maxframe/dataframe/statistics/quantile.py +13 -19
  82. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  83. maxframe/dataframe/tests/test_initializer.py +33 -2
  84. maxframe/dataframe/utils.py +26 -11
  85. maxframe/dataframe/window/expanding.py +5 -3
  86. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  87. maxframe/errors.py +13 -0
  88. maxframe/extension.py +12 -0
  89. maxframe/io/__init__.py +13 -0
  90. maxframe/io/objects/__init__.py +24 -0
  91. maxframe/io/objects/core.py +140 -0
  92. maxframe/io/objects/tensor.py +76 -0
  93. maxframe/io/objects/tests/__init__.py +13 -0
  94. maxframe/io/objects/tests/test_object_io.py +97 -0
  95. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  96. maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
  97. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  98. maxframe/io/odpsio/tableio.py +719 -0
  99. maxframe/io/odpsio/tests/__init__.py +13 -0
  100. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
  101. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  102. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  103. maxframe/io/odpsio/volumeio.py +63 -0
  104. maxframe/learn/contrib/__init__.py +3 -1
  105. maxframe/learn/contrib/graph/__init__.py +15 -0
  106. maxframe/learn/contrib/graph/connected_components.py +215 -0
  107. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  108. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  109. maxframe/learn/contrib/llm/__init__.py +16 -0
  110. maxframe/learn/contrib/llm/core.py +54 -0
  111. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  112. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  113. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  114. maxframe/learn/contrib/llm/text.py +42 -0
  115. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  116. maxframe/learn/contrib/xgboost/core.py +87 -2
  117. maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
  118. maxframe/learn/contrib/xgboost/predict.py +29 -46
  119. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  120. maxframe/learn/contrib/xgboost/train.py +29 -18
  121. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  122. maxframe/lib/mmh3.cp39-win32.pyd +0 -0
  123. maxframe/lib/mmh3.pyi +43 -0
  124. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  125. maxframe/lib/wrapped_pickle.py +2 -1
  126. maxframe/opcodes.py +8 -0
  127. maxframe/protocol.py +154 -27
  128. maxframe/remote/core.py +4 -8
  129. maxframe/serialization/__init__.py +1 -0
  130. maxframe/serialization/core.cp39-win32.pyd +0 -0
  131. maxframe/serialization/core.pxd +3 -0
  132. maxframe/serialization/core.pyi +3 -0
  133. maxframe/serialization/core.pyx +67 -26
  134. maxframe/serialization/exception.py +1 -1
  135. maxframe/serialization/pandas.py +52 -17
  136. maxframe/serialization/serializables/core.py +180 -15
  137. maxframe/serialization/serializables/field_type.py +4 -1
  138. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  139. maxframe/serialization/tests/test_serial.py +2 -1
  140. maxframe/session.py +9 -2
  141. maxframe/tensor/__init__.py +81 -2
  142. maxframe/tensor/arithmetic/isclose.py +1 -0
  143. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  144. maxframe/tensor/core.py +5 -136
  145. maxframe/tensor/datasource/array.py +3 -0
  146. maxframe/tensor/datasource/full.py +1 -1
  147. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  148. maxframe/tensor/indexing/flatnonzero.py +1 -1
  149. maxframe/tensor/indexing/getitem.py +2 -0
  150. maxframe/tensor/merge/__init__.py +2 -0
  151. maxframe/tensor/merge/concatenate.py +101 -0
  152. maxframe/tensor/merge/tests/test_merge.py +30 -1
  153. maxframe/tensor/merge/vstack.py +74 -0
  154. maxframe/tensor/{base → misc}/__init__.py +2 -0
  155. maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
  156. maxframe/tensor/misc/atleast_2d.py +70 -0
  157. maxframe/tensor/misc/atleast_3d.py +85 -0
  158. maxframe/tensor/misc/tests/__init__.py +13 -0
  159. maxframe/tensor/{base → misc}/transpose.py +22 -18
  160. maxframe/tensor/{base → misc}/unique.py +3 -3
  161. maxframe/tensor/operators.py +1 -7
  162. maxframe/tensor/random/core.py +1 -1
  163. maxframe/tensor/reduction/count_nonzero.py +2 -1
  164. maxframe/tensor/reduction/mean.py +1 -0
  165. maxframe/tensor/reduction/nanmean.py +1 -0
  166. maxframe/tensor/reduction/nanvar.py +2 -0
  167. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  168. maxframe/tensor/reduction/var.py +2 -0
  169. maxframe/tensor/statistics/quantile.py +2 -2
  170. maxframe/tensor/utils.py +2 -22
  171. maxframe/tests/test_protocol.py +34 -0
  172. maxframe/tests/test_utils.py +0 -12
  173. maxframe/tests/utils.py +17 -2
  174. maxframe/typing_.py +4 -1
  175. maxframe/udf.py +8 -9
  176. maxframe/utils.py +106 -86
  177. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
  178. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
  179. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
  180. maxframe_client/__init__.py +0 -1
  181. maxframe_client/clients/framedriver.py +4 -1
  182. maxframe_client/fetcher.py +81 -74
  183. maxframe_client/session/consts.py +3 -0
  184. maxframe_client/session/graph.py +8 -2
  185. maxframe_client/session/odps.py +194 -40
  186. maxframe_client/session/task.py +94 -39
  187. maxframe_client/tests/test_fetcher.py +21 -3
  188. maxframe_client/tests/test_session.py +109 -8
  189. maxframe/core/entity/chunks.py +0 -68
  190. maxframe/core/entity/fuse.py +0 -73
  191. maxframe/core/graph/builder/chunk.py +0 -430
  192. maxframe/odpsio/tableio.py +0 -322
  193. maxframe/odpsio/volumeio.py +0 -95
  194. maxframe_client/clients/spe.py +0 -104
  195. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  196. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  197. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  198. /maxframe/tensor/{base → misc}/astype.py +0 -0
  199. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  200. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  201. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  202. /maxframe/tensor/{base → misc}/where.py +0 -0
  203. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
@@ -14,28 +14,44 @@
14
14
 
15
15
  import abc
16
16
  import asyncio
17
+ import copy
17
18
  import logging
18
19
  import time
19
20
  import weakref
20
21
  from numbers import Integral
21
- from typing import Dict, List, Mapping, Optional, Tuple, Union
22
+ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
22
23
  from urllib.parse import urlparse
23
24
 
24
25
  import numpy as np
25
26
  import pandas as pd
26
27
  from odps import ODPS
28
+ from odps import options as odps_options
29
+ from odps.console import in_ipython_frontend
27
30
 
28
31
  from maxframe.config import options
29
- from maxframe.core import Entity, TileableGraph, enter_mode
32
+ from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
33
+ from maxframe.core.operator import Fetch
30
34
  from maxframe.dataframe import read_odps_table
31
35
  from maxframe.dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
32
36
  from maxframe.dataframe.datasource import PandasDataSourceOperator
33
37
  from maxframe.dataframe.datasource.read_odps_table import DataFrameReadODPSTable
34
- from maxframe.odpsio import HaloTableIO, pandas_to_arrow, pandas_to_odps_schema
38
+ from maxframe.errors import (
39
+ MaxFrameError,
40
+ NoTaskServerResponseError,
41
+ SessionAlreadyClosedError,
42
+ )
43
+ from maxframe.io.objects import get_object_io_handler
44
+ from maxframe.io.odpsio import (
45
+ ODPSTableIO,
46
+ ODPSVolumeWriter,
47
+ pandas_to_arrow,
48
+ pandas_to_odps_schema,
49
+ )
35
50
  from maxframe.protocol import (
36
51
  DagInfo,
37
52
  DagStatus,
38
53
  ODPSTableResultInfo,
54
+ ODPSVolumeResultInfo,
39
55
  ResultInfo,
40
56
  SessionInfo,
41
57
  )
@@ -46,8 +62,15 @@ from maxframe.session import (
46
62
  Profiling,
47
63
  Progress,
48
64
  )
65
+ from maxframe.tensor.datasource import ArrayDataSource
49
66
  from maxframe.typing_ import TileableType
50
- from maxframe.utils import ToThreadMixin, build_temp_table_name
67
+ from maxframe.utils import (
68
+ ToThreadMixin,
69
+ build_session_volume_name,
70
+ build_temp_table_name,
71
+ str_to_bool,
72
+ sync_pyodps_options,
73
+ )
51
74
 
52
75
  from ..clients.framedriver import FrameDriverClient
53
76
  from ..fetcher import get_fetcher_cls
@@ -58,6 +81,45 @@ logger = logging.getLogger(__name__)
58
81
 
59
82
 
60
83
  class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
84
+ def get_settings_to_upload(self) -> Dict[str, Any]:
85
+ sql_settings = (odps_options.sql.settings or {}).copy()
86
+ sql_settings.update(options.sql.settings or {})
87
+
88
+ quota_name = options.session.quota_name or getattr(
89
+ odps_options, "quota_name", None
90
+ )
91
+ lifecycle = options.session.table_lifecycle or odps_options.lifecycle
92
+ temp_lifecycle = (
93
+ options.session.temp_table_lifecycle or odps_options.temp_lifecycle
94
+ )
95
+
96
+ enable_schema = options.session.enable_schema
97
+ default_schema = options.session.default_schema
98
+ if hasattr(self, "_odps_entry"):
99
+ default_schema = default_schema or self._odps_entry.schema
100
+
101
+ # use flags in sql settings
102
+ if sql_settings.get("odps.default.schema"):
103
+ default_schema = sql_settings["odps.default.schema"]
104
+ if str_to_bool(
105
+ sql_settings.get("odps.namespace.schema") or "false"
106
+ ) or str_to_bool(
107
+ sql_settings.get("odps.sql.allow.namespace.schema") or "false"
108
+ ):
109
+ enable_schema = True
110
+
111
+ mf_settings = dict(options.to_dict(remote_only=True).items())
112
+ mf_settings["sql.settings"] = sql_settings
113
+ mf_settings["session.table_lifecycle"] = lifecycle
114
+ mf_settings["session.temp_table_lifecycle"] = temp_lifecycle
115
+ mf_settings["session.quota_name"] = quota_name
116
+ if enable_schema is not None:
117
+ mf_settings["session.enable_schema"] = enable_schema
118
+ if options.session.enable_high_availability is None:
119
+ mf_settings["session.enable_high_availability"] = not in_ipython_frontend()
120
+ mf_settings["session.default_schema"] = default_schema or "default"
121
+ return mf_settings
122
+
61
123
  @abc.abstractmethod
62
124
  def create_session(self) -> SessionInfo:
63
125
  raise NotImplementedError
@@ -68,7 +130,10 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
68
130
 
69
131
  @abc.abstractmethod
70
132
  def submit_dag(
71
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
133
+ self,
134
+ dag: TileableGraph,
135
+ managed_input_infos: Dict[str, ResultInfo],
136
+ new_settings: Dict[str, Any] = None,
72
137
  ) -> DagInfo:
73
138
  raise NotImplementedError
74
139
 
@@ -122,6 +187,8 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
122
187
  self._tileable_to_infos = weakref.WeakKeyDictionary()
123
188
 
124
189
  self._caller = self._create_caller(odps_entry, address, **kwargs)
190
+ self._last_settings = None
191
+ self._pull_interval = 1 if in_ipython_frontend() else 3
125
192
 
126
193
  @classmethod
127
194
  def _create_caller(
@@ -131,31 +198,32 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
131
198
 
132
199
  async def _init(self, _address: str):
133
200
  session_info = await self.ensure_async_call(self._caller.create_session)
201
+ self._last_settings = copy.deepcopy(self._caller.get_settings_to_upload())
134
202
  self._session_id = session_info.session_id
135
203
  await self._show_logview_address()
136
204
 
137
- def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
138
- if (
139
- not isinstance(t.op, PandasDataSourceOperator)
140
- or t.op.get_data() is None
141
- or t.inputs
142
- ):
143
- return None
144
-
145
- schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
205
+ def _upload_and_get_table_read_tileable(
206
+ self, t: TileableType
207
+ ) -> Optional[TileableType]:
208
+ table_schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
146
209
  if self._odps_entry.exist_table(table_meta.table_name):
147
- self._odps_entry.delete_table(table_meta.table_name)
210
+ self._odps_entry.delete_table(
211
+ table_meta.table_name, hints=options.sql.settings
212
+ )
148
213
  table_name = build_temp_table_name(self.session_id, t.key)
149
214
  table_obj = self._odps_entry.create_table(
150
- table_name, schema, lifecycle=options.session.temp_table_lifecycle
215
+ table_name,
216
+ table_schema,
217
+ lifecycle=options.session.temp_table_lifecycle,
218
+ hints=options.sql.settings,
151
219
  )
152
220
 
153
221
  data = t.op.get_data()
154
222
  batch_size = options.session.upload_batch_size
155
223
 
156
224
  if len(data):
157
- halo_client = HaloTableIO(self._odps_entry)
158
- with halo_client.open_writer(table_obj.full_table_name) as writer:
225
+ table_client = ODPSTableIO(self._odps_entry)
226
+ with table_client.open_writer(table_obj.full_table_name) as writer:
159
227
  for batch_start in range(0, len(data), batch_size):
160
228
  if isinstance(data, pd.Index):
161
229
  batch = data[batch_start : batch_start + batch_size]
@@ -178,13 +246,35 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
178
246
  read_tileable.name = t.name
179
247
  else: # INDEX_TYPE
180
248
  if list(read_tileable.names) != list(t.names):
181
- read_tileable.names = t.names
249
+ read_tileable.rename(t.names, inplace=True)
182
250
  read_tileable._key = t.key
183
251
  read_tileable.params = t.params
184
252
  return read_tileable.data
185
253
 
254
+ def _upload_and_get_vol_read_tileable(
255
+ self, t: TileableType
256
+ ) -> Optional[TileableType]:
257
+ vol_name = build_session_volume_name(self.session_id)
258
+ writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
259
+ io_handler = get_object_io_handler(t)
260
+ io_handler().write_object(writer, t, t.op.data)
261
+ return build_fetch(t).data
262
+
263
+ def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
264
+ if (
265
+ not isinstance(t.op, (ArrayDataSource, PandasDataSourceOperator))
266
+ or t.op.get_data() is None
267
+ or t.inputs
268
+ ):
269
+ return None
270
+ with sync_pyodps_options():
271
+ if isinstance(t.op, PandasDataSourceOperator):
272
+ return self._upload_and_get_table_read_tileable(t)
273
+ else:
274
+ return self._upload_and_get_vol_read_tileable(t)
275
+
186
276
  @enter_mode(kernel=True, build=True)
187
- def _scan_and_replace_pandas_sources(
277
+ def _scan_and_replace_local_sources(
188
278
  self, graph: TileableGraph
189
279
  ) -> Dict[TileableType, TileableType]:
190
280
  """Replaces Pandas data sources with temp table sources in the graph"""
@@ -205,7 +295,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
205
295
 
206
296
  for succ in successors:
207
297
  graph.add_edge(replaced, succ)
208
- succ.inputs = [replacements.get(t, t) for t in succ.inputs]
298
+ succ.op._set_inputs([replacements.get(t, t) for t in succ.inputs])
209
299
 
210
300
  graph.results = [replacements.get(t, t) for t in graph.results]
211
301
  return replacements
@@ -213,16 +303,41 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
213
303
  @enter_mode(kernel=True, build=True)
214
304
  def _get_input_infos(self, tileables: List[TileableType]) -> Dict[str, ResultInfo]:
215
305
  """Generate ResultInfo structs from generated temp tables"""
306
+ vol_name = build_session_volume_name(self.session_id)
307
+
216
308
  infos = dict()
217
309
  for t in tileables:
218
310
  key = t.key
219
- if not isinstance(t.op, DataFrameReadODPSTable):
220
- if not isinstance(t.inputs[0].op, DataFrameReadODPSTable):
221
- continue
222
- t = t.inputs[0]
223
- infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
311
+ if isinstance(t.op, DataFrameReadODPSTable):
312
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
313
+ else:
314
+ if isinstance(t.op, Fetch):
315
+ infos[key] = ODPSVolumeResultInfo(
316
+ volume_name=vol_name, volume_path=t.key
317
+ )
318
+ elif t.inputs and isinstance(t.inputs[0].op, DataFrameReadODPSTable):
319
+ t = t.inputs[0]
320
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
224
321
  return infos
225
322
 
323
+ def _get_diff_settings(self) -> Dict[str, Any]:
324
+ new_settings = self._caller.get_settings_to_upload()
325
+ if not self._last_settings: # pragma: no cover
326
+ self._last_settings = copy.deepcopy(new_settings)
327
+ return new_settings
328
+
329
+ update = dict()
330
+ for k in new_settings.keys():
331
+ old_item = self._last_settings.get(k)
332
+ new_item = new_settings.get(k)
333
+ try:
334
+ if old_item != new_item:
335
+ update[k] = new_item
336
+ except: # noqa: E722 # nosec # pylint: disable=bare-except
337
+ update[k] = new_item
338
+ self._last_settings = copy.deepcopy(new_settings)
339
+ return update
340
+
226
341
  async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
227
342
  tileables = [
228
343
  tileable.data if isinstance(tileable, Entity) else tileable
@@ -232,7 +347,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
232
347
  tileable_graph, to_execute_tileables = gen_submit_tileable_graph(
233
348
  self, tileables, tileable_to_copied
234
349
  )
235
- source_replacements = self._scan_and_replace_pandas_sources(tileable_graph)
350
+ source_replacements = self._scan_and_replace_local_sources(tileable_graph)
236
351
 
237
352
  # we need to manage uploaded data sources with refcounting mechanism
238
353
  # as nodes in tileable_graph are copied, we need to use original nodes
@@ -242,7 +357,10 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
242
357
 
243
358
  replaced_infos = self._get_input_infos(list(source_replacements.values()))
244
359
  dag_info = await self.ensure_async_call(
245
- self._caller.submit_dag, tileable_graph, replaced_infos
360
+ self._caller.submit_dag,
361
+ tileable_graph,
362
+ replaced_infos,
363
+ self._get_diff_settings(),
246
364
  )
247
365
 
248
366
  await self._show_logview_address(dag_info.dag_id)
@@ -264,25 +382,55 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
264
382
  self, dag_info: DagInfo, tileables: List, progress: Progress
265
383
  ):
266
384
  start_time = time.time()
385
+ session_id = dag_info.session_id
267
386
  dag_id = dag_info.dag_id
268
- wait_timeout = 10
387
+ server_no_response_time = None
269
388
  with enter_mode(build=True, kernel=True):
270
389
  key_to_tileables = {t.key: t for t in tileables}
271
-
390
+ timeout_val = 0.1
272
391
  try:
273
392
  while True:
274
393
  elapsed_time = time.time() - start_time
394
+ next_timeout_val = min(timeout_val * 2, self._pull_interval)
275
395
  timeout_val = (
276
- min(self.timeout - elapsed_time, wait_timeout)
396
+ min(self.timeout - elapsed_time, next_timeout_val)
277
397
  if self.timeout
278
- else wait_timeout
398
+ else next_timeout_val
279
399
  )
280
400
  if timeout_val <= 0:
281
401
  raise TimeoutError("Running DAG timed out")
282
402
 
283
- dag_info: DagInfo = await self.ensure_async_call(
284
- self._caller.get_dag_info, dag_id
285
- )
403
+ try:
404
+ dag_info: DagInfo = await self.ensure_async_call(
405
+ self._caller.get_dag_info, dag_id
406
+ )
407
+ server_no_response_time = None
408
+ except (NoTaskServerResponseError, SessionAlreadyClosedError) as ex:
409
+ # when we receive SessionAlreadyClosedError after NoTaskServerResponseError
410
+ # is received, it is possible that task server is restarted and
411
+ # SessionAlreadyClosedError might be flaky. Otherwise, the error
412
+ # should be raised.
413
+ if (
414
+ isinstance(ex, SessionAlreadyClosedError)
415
+ and not server_no_response_time
416
+ ):
417
+ raise
418
+ server_no_response_time = server_no_response_time or time.time()
419
+ if (
420
+ time.time() - server_no_response_time
421
+ > options.client.task_restart_timeout
422
+ ):
423
+ raise MaxFrameError(
424
+ "Failed to get valid response from service. "
425
+ f"Session {self._session_id}."
426
+ ) from None
427
+ await asyncio.sleep(timeout_val)
428
+ continue
429
+
430
+ if dag_info is None:
431
+ raise SystemError(
432
+ f"Cannot find DAG with ID {dag_id} in session {session_id}"
433
+ )
286
434
  progress.value = dag_info.progress
287
435
  if dag_info.status != DagStatus.RUNNING:
288
436
  break
@@ -344,7 +492,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
344
492
  data_tileable, indexes = self._get_data_tileable_and_indexes(tileable)
345
493
  info = self._tileable_to_infos[data_tileable]
346
494
  fetcher = get_fetcher_cls(info.result_type)(self._odps_entry)
347
- results.append(await fetcher.fetch(tileable, info, indexes))
495
+ results.append(await fetcher.fetch(data_tileable, info, indexes))
348
496
  return results
349
497
 
350
498
  async def decref(self, *tileable_keys):
@@ -422,7 +570,8 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
422
570
  _client: FrameDriverClient
423
571
  _session_id: Optional[str]
424
572
 
425
- def __init__(self, client: FrameDriverClient):
573
+ def __init__(self, odps_entry: ODPS, client: FrameDriverClient):
574
+ self._odps_entry = odps_entry
426
575
  self._client = client
427
576
  self._session_id = None
428
577
 
@@ -435,9 +584,14 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
435
584
  await self._client.delete_session(self._session_id)
436
585
 
437
586
  async def submit_dag(
438
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
587
+ self,
588
+ dag: TileableGraph,
589
+ managed_input_infos: Dict[str, ResultInfo] = None,
590
+ new_settings: Dict[str, Any] = None,
439
591
  ) -> DagInfo:
440
- return await self._client.submit_dag(self._session_id, dag, managed_input_infos)
592
+ return await self._client.submit_dag(
593
+ self._session_id, dag, managed_input_infos, new_settings=new_settings
594
+ )
441
595
 
442
596
  async def get_dag_info(self, dag_id: str) -> DagInfo:
443
597
  return await self._client.get_dag_info(self._session_id, dag_id)
@@ -475,7 +629,7 @@ class MaxFrameRestSession(MaxFrameSession):
475
629
 
476
630
  @classmethod
477
631
  def _create_caller(cls, odps_entry: ODPS, address: str, **kwargs):
478
- return MaxFrameRestCaller(FrameDriverClient(address))
632
+ return MaxFrameRestCaller(odps_entry, FrameDriverClient(address))
479
633
 
480
634
 
481
635
  def register_session_schemes(overwrite: bool = False):
@@ -16,7 +16,7 @@ import base64
16
16
  import json
17
17
  import logging
18
18
  import time
19
- from typing import Dict, List, Optional, Type, Union
19
+ from typing import Any, Dict, List, Optional, Type, Union
20
20
 
21
21
  import msgpack
22
22
  from odps import ODPS
@@ -24,8 +24,15 @@ from odps import options as odps_options
24
24
  from odps.errors import parse_instance_error
25
25
  from odps.models import Instance, MaxFrameTask
26
26
 
27
+ try:
28
+ from odps.errors import EmptyTaskInfoError
29
+ except ImportError: # pragma: no cover
30
+ # todo remove when pyodps>=0.12.0 is enforced
31
+ EmptyTaskInfoError = type("EmptyTaskInfoError", (Exception,), {})
32
+
27
33
  from maxframe.config import options
28
34
  from maxframe.core import TileableGraph
35
+ from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
29
36
  from maxframe.protocol import DagInfo, JsonSerializable, ResultInfo, SessionInfo
30
37
  from maxframe.utils import deserialize_serializable, serialize_serializable, to_str
31
38
 
@@ -35,6 +42,7 @@ except ImportError:
35
42
  mf_version = None
36
43
 
37
44
  from .consts import (
45
+ EMPTY_RESPONSE_RETRY_COUNT,
38
46
  MAXFRAME_DEFAULT_PROTOCOL,
39
47
  MAXFRAME_OUTPUT_JSON_FORMAT,
40
48
  MAXFRAME_OUTPUT_MAXFRAME_FORMAT,
@@ -82,6 +90,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
82
90
  self._running_cluster = running_cluster
83
91
  self._major_version = major_version
84
92
  self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
93
+ self._deleted = False
85
94
 
86
95
  if nested_instance_id is None:
87
96
  self._nested = False
@@ -90,14 +99,26 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
90
99
  self._nested = True
91
100
  self._instance = odps_entry.get_instance(nested_instance_id)
92
101
 
102
+ @property
103
+ def instance(self):
104
+ return self._instance
105
+
93
106
  def _deserial_task_info_result(
94
107
  self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
95
108
  ):
96
109
  if isinstance(content, (str, bytes)):
110
+ if len(content) == 0:
111
+ content = "{}"
97
112
  json_data = json.loads(to_str(content))
98
113
  else:
99
114
  json_data = content
100
- result_data = base64.b64decode(json_data["result"])
115
+ encoded_result = json_data.get("result")
116
+ if not encoded_result:
117
+ if self._deleted:
118
+ return None
119
+ else:
120
+ raise SessionAlreadyClosedError(self._instance.id)
121
+ result_data = base64.b64decode(encoded_result)
101
122
  if self._output_format == MAXFRAME_OUTPUT_MAXFRAME_FORMAT:
102
123
  return deserialize_serializable(result_data)
103
124
  elif self._output_format == MAXFRAME_OUTPUT_JSON_FORMAT:
@@ -110,21 +131,9 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
110
131
  )
111
132
 
112
133
  def _create_maxframe_task(self) -> MaxFrameTask:
113
- task = MaxFrameTask(
114
- name=self._task_name,
115
- major_version=self._major_version,
116
- service_endpoint=self._odps_entry.endpoint,
117
- )
118
-
119
- # merge sql options
120
- sql_settings = (odps_options.sql.settings or {}).copy()
121
- sql_settings.update(options.sql.settings or {})
122
-
123
- mf_settings = dict(options.to_dict(remote_only=True).items())
124
- mf_settings["sql.settings"] = sql_settings
125
-
134
+ task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
126
135
  mf_opts = {
127
- "odps.maxframe.settings": json.dumps(mf_settings),
136
+ "odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
128
137
  "odps.maxframe.output_format": self._output_format,
129
138
  }
130
139
  if mf_version:
@@ -178,11 +187,40 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
178
187
  time.sleep(interval)
179
188
  interval = min(max_interval, interval * 2)
180
189
 
190
+ def _put_task_info(self, method_name: str, json_data: dict):
191
+ for trial in range(EMPTY_RESPONSE_RETRY_COUNT):
192
+ try:
193
+ return self._instance.put_task_info(
194
+ self._task_name,
195
+ method_name,
196
+ json.dumps(json_data),
197
+ raise_empty=True,
198
+ )
199
+ except TypeError: # pragma: no cover
200
+ # todo remove when pyodps>=0.12.0 is enforced
201
+ resp_data = self._instance.put_task_info(
202
+ self._task_name, method_name, json.dumps(json_data)
203
+ )
204
+ if resp_data:
205
+ return resp_data
206
+ else:
207
+ raise NoTaskServerResponseError(
208
+ f"No response for request {method_name}. "
209
+ f"Instance ID: {self._instance.id}"
210
+ )
211
+ except EmptyTaskInfoError as ex:
212
+ # retry when server returns HTTP 204, which is designed for retry
213
+ if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
214
+ raise NoTaskServerResponseError(
215
+ f"No response for request {method_name}. "
216
+ f"Instance ID: {self._instance.id}. "
217
+ f"Request ID: {ex.request_id}"
218
+ ) from None
219
+ time.sleep(0.5)
220
+
181
221
  def get_session(self) -> SessionInfo:
182
222
  req_data = {"output_format": self._output_format}
183
- serialized = self._instance.put_task_info(
184
- self._task_name, MAXFRAME_TASK_GET_SESSION_METHOD, json.dumps(req_data)
185
- )
223
+ serialized = self._put_task_info(MAXFRAME_TASK_GET_SESSION_METHOD, req_data)
186
224
  info: SessionInfo = self._deserial_task_info_result(serialized, SessionInfo)
187
225
  info.session_id = self._instance.id
188
226
  return info
@@ -192,28 +230,28 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
192
230
  self._instance.stop()
193
231
  else:
194
232
  req_data = {"output_format": self._output_format}
195
- self._instance.put_task_info(
196
- self._task_name,
197
- MAXFRAME_TASK_DELETE_SESSION_METHOD,
198
- json.dumps(req_data),
199
- )
233
+ self._put_task_info(MAXFRAME_TASK_DELETE_SESSION_METHOD, req_data)
234
+ self._deleted = True
200
235
 
201
236
  def submit_dag(
202
237
  self,
203
238
  dag: TileableGraph,
204
239
  managed_input_infos: Optional[Dict[str, ResultInfo]] = None,
240
+ new_settings: Dict[str, Any] = None,
205
241
  ) -> DagInfo:
242
+ new_settings_value = {
243
+ "odps.maxframe.settings": json.dumps(new_settings),
244
+ }
206
245
  req_data = {
207
246
  "protocol": MAXFRAME_DEFAULT_PROTOCOL,
208
247
  "dag": base64.b64encode(serialize_serializable(dag)).decode(),
209
248
  "managed_input_infos": base64.b64encode(
210
249
  serialize_serializable(managed_input_infos)
211
250
  ).decode(),
251
+ "new_settings": json.dumps(new_settings_value),
212
252
  "output_format": self._output_format,
213
253
  }
214
- res = self._instance.put_task_info(
215
- self._task_name, MAXFRAME_TASK_SUBMIT_DAG_METHOD, json.dumps(req_data)
216
- )
254
+ res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
217
255
  return self._deserial_task_info_result(res, DagInfo)
218
256
 
219
257
  def get_dag_info(self, dag_id: str) -> DagInfo:
@@ -222,9 +260,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
222
260
  "dag_id": dag_id,
223
261
  "output_format": self._output_format,
224
262
  }
225
- res = self._instance.put_task_info(
226
- self._task_name, MAXFRAME_TASK_GET_DAG_INFO_METHOD, json.dumps(req_data)
227
- )
263
+ res = self._put_task_info(MAXFRAME_TASK_GET_DAG_INFO_METHOD, req_data)
228
264
  return self._deserial_task_info_result(res, DagInfo)
229
265
 
230
266
  def cancel_dag(self, dag_id: str) -> DagInfo:
@@ -233,29 +269,39 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
233
269
  "dag_id": dag_id,
234
270
  "output_format": self._output_format,
235
271
  }
236
- res = self._instance.put_task_info(
237
- self._task_name, MAXFRAME_TASK_CANCEL_DAG_METHOD, json.dumps(req_data)
238
- )
272
+ res = self._put_task_info(MAXFRAME_TASK_CANCEL_DAG_METHOD, req_data)
239
273
  return self._deserial_task_info_result(res, DagInfo)
240
274
 
241
275
  def decref(self, tileable_keys: List[str]) -> None:
242
276
  req_data = {
243
277
  "tileable_keys": ",".join(tileable_keys),
244
278
  }
245
- self._instance.put_task_info(
246
- self._task_name, MAXFRAME_TASK_DECREF_METHOD, json.dumps(req_data)
247
- )
279
+ self._put_task_info(MAXFRAME_TASK_DECREF_METHOD, req_data)
248
280
 
249
281
  def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
282
+ """
283
+ Generate logview address
284
+
285
+ Parameters
286
+ ----------
287
+ dag_id: id of dag for which dag logview detail page to access
288
+ hours: hours of the logview address auth limit
289
+ Returns
290
+ -------
291
+ Logview address
292
+ """
250
293
  hours = hours or options.session.logview_hours
251
- subquery_suffix = f"&subQuery={dag_id}" if dag_id else ""
252
- return self._instance.get_logview_address(hours) + subquery_suffix
294
+ # notice: maxframe can't reuse subQuery else will conflict with mcqa when fetch resource data,
295
+ # added dagId for maxframe so logview backend will return maxframe data format if
296
+ # instance and dagId is provided.
297
+ dag_suffix = f"&dagId={dag_id}" if dag_id else ""
298
+ return self._instance.get_logview_address(hours) + dag_suffix
253
299
 
254
300
 
255
301
  class MaxFrameTaskSession(MaxFrameSession):
256
302
  schemes = [ODPS_SESSION_INSECURE_SCHEME, ODPS_SESSION_SECURE_SCHEME]
257
303
 
258
- _instance: Instance
304
+ _caller: MaxFrameInstanceCaller
259
305
 
260
306
  @classmethod
261
307
  def _create_caller(
@@ -275,6 +321,15 @@ class MaxFrameTaskSession(MaxFrameSession):
275
321
  **kwargs,
276
322
  )
277
323
 
324
+ @property
325
+ def closed(self) -> bool:
326
+ if super().closed:
327
+ return True
328
+ if not self._caller or not self._caller.instance:
329
+ # session not initialized yet
330
+ return False
331
+ return self._caller.instance.is_terminated()
332
+
278
333
 
279
334
  def register_session_schemes(overwrite: bool = False):
280
335
  MaxFrameTaskSession.register_schemes(overwrite=overwrite)