maxframe 0.1.0b4__cp311-cp311-win32.whl → 1.0.0__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (214) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp311-win32.pyd +0 -0
  3. maxframe/codegen.py +56 -5
  4. maxframe/config/config.py +78 -10
  5. maxframe/config/validators.py +42 -11
  6. maxframe/conftest.py +58 -14
  7. maxframe/core/__init__.py +2 -16
  8. maxframe/core/entity/__init__.py +1 -12
  9. maxframe/core/entity/executable.py +1 -1
  10. maxframe/core/entity/objects.py +46 -45
  11. maxframe/core/entity/output_types.py +0 -3
  12. maxframe/core/entity/tests/test_objects.py +43 -0
  13. maxframe/core/entity/tileables.py +5 -78
  14. maxframe/core/graph/__init__.py +2 -2
  15. maxframe/core/graph/builder/__init__.py +0 -1
  16. maxframe/core/graph/builder/base.py +5 -4
  17. maxframe/core/graph/builder/tileable.py +4 -4
  18. maxframe/core/graph/builder/utils.py +4 -8
  19. maxframe/core/graph/core.cp311-win32.pyd +0 -0
  20. maxframe/core/graph/core.pyx +4 -4
  21. maxframe/core/graph/entity.py +9 -33
  22. maxframe/core/operator/__init__.py +2 -9
  23. maxframe/core/operator/base.py +3 -5
  24. maxframe/core/operator/objects.py +0 -9
  25. maxframe/core/operator/utils.py +55 -0
  26. maxframe/dataframe/__init__.py +2 -1
  27. maxframe/dataframe/arithmetic/around.py +5 -17
  28. maxframe/dataframe/arithmetic/core.py +15 -7
  29. maxframe/dataframe/arithmetic/docstring.py +7 -33
  30. maxframe/dataframe/arithmetic/equal.py +4 -2
  31. maxframe/dataframe/arithmetic/greater.py +4 -2
  32. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  33. maxframe/dataframe/arithmetic/less.py +2 -2
  34. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  36. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  37. maxframe/dataframe/core.py +58 -12
  38. maxframe/dataframe/datasource/date_range.py +2 -2
  39. maxframe/dataframe/datasource/read_odps_query.py +120 -24
  40. maxframe/dataframe/datasource/read_odps_table.py +9 -4
  41. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  42. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  43. maxframe/dataframe/datastore/to_odps.py +28 -0
  44. maxframe/dataframe/extensions/__init__.py +5 -0
  45. maxframe/dataframe/extensions/flatjson.py +131 -0
  46. maxframe/dataframe/extensions/flatmap.py +317 -0
  47. maxframe/dataframe/extensions/reshuffle.py +1 -1
  48. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  49. maxframe/dataframe/groupby/core.py +1 -1
  50. maxframe/dataframe/groupby/cum.py +0 -1
  51. maxframe/dataframe/groupby/fill.py +4 -1
  52. maxframe/dataframe/groupby/getitem.py +6 -0
  53. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  54. maxframe/dataframe/groupby/transform.py +5 -1
  55. maxframe/dataframe/indexing/align.py +1 -1
  56. maxframe/dataframe/indexing/loc.py +6 -4
  57. maxframe/dataframe/indexing/rename.py +5 -28
  58. maxframe/dataframe/indexing/sample.py +0 -1
  59. maxframe/dataframe/indexing/set_index.py +68 -1
  60. maxframe/dataframe/initializer.py +11 -1
  61. maxframe/dataframe/merge/__init__.py +9 -1
  62. maxframe/dataframe/merge/concat.py +41 -31
  63. maxframe/dataframe/merge/merge.py +237 -3
  64. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  65. maxframe/dataframe/misc/__init__.py +4 -0
  66. maxframe/dataframe/misc/apply.py +6 -11
  67. maxframe/dataframe/misc/case_when.py +141 -0
  68. maxframe/dataframe/misc/describe.py +2 -2
  69. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  70. maxframe/dataframe/misc/eval.py +4 -0
  71. maxframe/dataframe/misc/memory_usage.py +2 -2
  72. maxframe/dataframe/misc/pct_change.py +1 -83
  73. maxframe/dataframe/misc/pivot_table.py +262 -0
  74. maxframe/dataframe/misc/tests/test_misc.py +93 -1
  75. maxframe/dataframe/misc/transform.py +1 -30
  76. maxframe/dataframe/misc/value_counts.py +4 -17
  77. maxframe/dataframe/missing/dropna.py +1 -1
  78. maxframe/dataframe/missing/fillna.py +5 -5
  79. maxframe/dataframe/operators.py +1 -17
  80. maxframe/dataframe/plotting/core.py +2 -2
  81. maxframe/dataframe/reduction/core.py +4 -3
  82. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  83. maxframe/dataframe/sort/sort_values.py +1 -11
  84. maxframe/dataframe/statistics/corr.py +3 -3
  85. maxframe/dataframe/statistics/quantile.py +13 -19
  86. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  87. maxframe/dataframe/tests/test_initializer.py +33 -2
  88. maxframe/dataframe/utils.py +33 -11
  89. maxframe/dataframe/window/expanding.py +5 -3
  90. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  91. maxframe/errors.py +13 -0
  92. maxframe/extension.py +12 -0
  93. maxframe/io/__init__.py +13 -0
  94. maxframe/io/objects/__init__.py +24 -0
  95. maxframe/io/objects/core.py +140 -0
  96. maxframe/io/objects/tensor.py +76 -0
  97. maxframe/io/objects/tests/__init__.py +13 -0
  98. maxframe/io/objects/tests/test_object_io.py +97 -0
  99. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  100. maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
  101. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  102. maxframe/io/odpsio/tableio.py +719 -0
  103. maxframe/io/odpsio/tests/__init__.py +13 -0
  104. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
  105. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  106. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  107. maxframe/io/odpsio/volumeio.py +63 -0
  108. maxframe/learn/contrib/__init__.py +3 -1
  109. maxframe/learn/contrib/graph/__init__.py +15 -0
  110. maxframe/learn/contrib/graph/connected_components.py +215 -0
  111. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  112. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  113. maxframe/learn/contrib/llm/__init__.py +16 -0
  114. maxframe/learn/contrib/llm/core.py +54 -0
  115. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  116. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  117. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  118. maxframe/learn/contrib/llm/text.py +42 -0
  119. maxframe/learn/contrib/utils.py +52 -0
  120. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  121. maxframe/learn/contrib/xgboost/classifier.py +110 -0
  122. maxframe/learn/contrib/xgboost/core.py +241 -0
  123. maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
  124. maxframe/learn/contrib/xgboost/predict.py +121 -0
  125. maxframe/learn/contrib/xgboost/regressor.py +71 -0
  126. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  127. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  128. maxframe/learn/contrib/xgboost/train.py +132 -0
  129. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  130. maxframe/learn/utils/__init__.py +15 -0
  131. maxframe/learn/utils/core.py +29 -0
  132. maxframe/lib/mmh3.cp311-win32.pyd +0 -0
  133. maxframe/lib/mmh3.pyi +43 -0
  134. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  135. maxframe/lib/wrapped_pickle.py +2 -1
  136. maxframe/opcodes.py +11 -0
  137. maxframe/protocol.py +154 -27
  138. maxframe/remote/core.py +4 -8
  139. maxframe/serialization/__init__.py +1 -0
  140. maxframe/serialization/core.cp311-win32.pyd +0 -0
  141. maxframe/serialization/core.pxd +3 -0
  142. maxframe/serialization/core.pyi +64 -0
  143. maxframe/serialization/core.pyx +67 -26
  144. maxframe/serialization/exception.py +1 -1
  145. maxframe/serialization/pandas.py +52 -17
  146. maxframe/serialization/serializables/core.py +180 -15
  147. maxframe/serialization/serializables/field_type.py +4 -1
  148. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  149. maxframe/serialization/tests/test_serial.py +2 -1
  150. maxframe/session.py +37 -2
  151. maxframe/tensor/__init__.py +81 -2
  152. maxframe/tensor/arithmetic/isclose.py +1 -0
  153. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  154. maxframe/tensor/core.py +5 -136
  155. maxframe/tensor/datasource/array.py +7 -2
  156. maxframe/tensor/datasource/full.py +1 -1
  157. maxframe/tensor/datasource/scalar.py +1 -1
  158. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  159. maxframe/tensor/indexing/flatnonzero.py +1 -1
  160. maxframe/tensor/indexing/getitem.py +2 -0
  161. maxframe/tensor/merge/__init__.py +2 -0
  162. maxframe/tensor/merge/concatenate.py +101 -0
  163. maxframe/tensor/merge/tests/test_merge.py +30 -1
  164. maxframe/tensor/merge/vstack.py +74 -0
  165. maxframe/tensor/{base → misc}/__init__.py +4 -0
  166. maxframe/tensor/misc/atleast_1d.py +72 -0
  167. maxframe/tensor/misc/atleast_2d.py +70 -0
  168. maxframe/tensor/misc/atleast_3d.py +85 -0
  169. maxframe/tensor/misc/tests/__init__.py +13 -0
  170. maxframe/tensor/{base → misc}/transpose.py +22 -18
  171. maxframe/tensor/misc/unique.py +205 -0
  172. maxframe/tensor/operators.py +1 -7
  173. maxframe/tensor/random/core.py +1 -1
  174. maxframe/tensor/reduction/count_nonzero.py +2 -1
  175. maxframe/tensor/reduction/mean.py +1 -0
  176. maxframe/tensor/reduction/nanmean.py +1 -0
  177. maxframe/tensor/reduction/nanvar.py +2 -0
  178. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  179. maxframe/tensor/reduction/var.py +2 -0
  180. maxframe/tensor/statistics/quantile.py +2 -2
  181. maxframe/tensor/utils.py +2 -22
  182. maxframe/tests/test_protocol.py +34 -0
  183. maxframe/tests/test_utils.py +0 -12
  184. maxframe/tests/utils.py +17 -2
  185. maxframe/typing_.py +4 -1
  186. maxframe/udf.py +62 -3
  187. maxframe/utils.py +112 -86
  188. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
  189. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
  190. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
  191. maxframe_client/__init__.py +0 -1
  192. maxframe_client/clients/framedriver.py +4 -1
  193. maxframe_client/fetcher.py +123 -54
  194. maxframe_client/session/consts.py +3 -0
  195. maxframe_client/session/graph.py +8 -2
  196. maxframe_client/session/odps.py +223 -40
  197. maxframe_client/session/task.py +108 -80
  198. maxframe_client/tests/test_fetcher.py +21 -3
  199. maxframe_client/tests/test_session.py +136 -8
  200. maxframe/core/entity/chunks.py +0 -68
  201. maxframe/core/entity/fuse.py +0 -73
  202. maxframe/core/graph/builder/chunk.py +0 -430
  203. maxframe/odpsio/tableio.py +0 -300
  204. maxframe/odpsio/volumeio.py +0 -95
  205. maxframe_client/clients/spe.py +0 -104
  206. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  207. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  208. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  209. /maxframe/tensor/{base → misc}/astype.py +0 -0
  210. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  211. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  212. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  213. /maxframe/tensor/{base → misc}/where.py +0 -0
  214. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
@@ -14,28 +14,44 @@
14
14
 
15
15
  import abc
16
16
  import asyncio
17
+ import copy
17
18
  import logging
18
19
  import time
19
20
  import weakref
20
21
  from numbers import Integral
21
- from typing import Dict, List, Mapping, Optional, Tuple, Union
22
+ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
22
23
  from urllib.parse import urlparse
23
24
 
24
25
  import numpy as np
25
26
  import pandas as pd
26
27
  from odps import ODPS
28
+ from odps import options as odps_options
29
+ from odps.console import in_ipython_frontend
27
30
 
28
31
  from maxframe.config import options
29
- from maxframe.core import Entity, TileableGraph, enter_mode
32
+ from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
33
+ from maxframe.core.operator import Fetch
30
34
  from maxframe.dataframe import read_odps_table
31
35
  from maxframe.dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
32
36
  from maxframe.dataframe.datasource import PandasDataSourceOperator
33
37
  from maxframe.dataframe.datasource.read_odps_table import DataFrameReadODPSTable
34
- from maxframe.odpsio import HaloTableIO, pandas_to_arrow, pandas_to_odps_schema
38
+ from maxframe.errors import (
39
+ MaxFrameError,
40
+ NoTaskServerResponseError,
41
+ SessionAlreadyClosedError,
42
+ )
43
+ from maxframe.io.objects import get_object_io_handler
44
+ from maxframe.io.odpsio import (
45
+ ODPSTableIO,
46
+ ODPSVolumeWriter,
47
+ pandas_to_arrow,
48
+ pandas_to_odps_schema,
49
+ )
35
50
  from maxframe.protocol import (
36
51
  DagInfo,
37
52
  DagStatus,
38
53
  ODPSTableResultInfo,
54
+ ODPSVolumeResultInfo,
39
55
  ResultInfo,
40
56
  SessionInfo,
41
57
  )
@@ -46,8 +62,15 @@ from maxframe.session import (
46
62
  Profiling,
47
63
  Progress,
48
64
  )
65
+ from maxframe.tensor.datasource import ArrayDataSource
49
66
  from maxframe.typing_ import TileableType
50
- from maxframe.utils import ToThreadMixin, build_temp_table_name
67
+ from maxframe.utils import (
68
+ ToThreadMixin,
69
+ build_session_volume_name,
70
+ build_temp_table_name,
71
+ str_to_bool,
72
+ sync_pyodps_options,
73
+ )
51
74
 
52
75
  from ..clients.framedriver import FrameDriverClient
53
76
  from ..fetcher import get_fetcher_cls
@@ -58,6 +81,45 @@ logger = logging.getLogger(__name__)
58
81
 
59
82
 
60
83
  class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
84
+ def get_settings_to_upload(self) -> Dict[str, Any]:
85
+ sql_settings = (odps_options.sql.settings or {}).copy()
86
+ sql_settings.update(options.sql.settings or {})
87
+
88
+ quota_name = options.session.quota_name or getattr(
89
+ odps_options, "quota_name", None
90
+ )
91
+ lifecycle = options.session.table_lifecycle or odps_options.lifecycle
92
+ temp_lifecycle = (
93
+ options.session.temp_table_lifecycle or odps_options.temp_lifecycle
94
+ )
95
+
96
+ enable_schema = options.session.enable_schema
97
+ default_schema = options.session.default_schema
98
+ if hasattr(self, "_odps_entry"):
99
+ default_schema = default_schema or self._odps_entry.schema
100
+
101
+ # use flags in sql settings
102
+ if sql_settings.get("odps.default.schema"):
103
+ default_schema = sql_settings["odps.default.schema"]
104
+ if str_to_bool(
105
+ sql_settings.get("odps.namespace.schema") or "false"
106
+ ) or str_to_bool(
107
+ sql_settings.get("odps.sql.allow.namespace.schema") or "false"
108
+ ):
109
+ enable_schema = True
110
+
111
+ mf_settings = dict(options.to_dict(remote_only=True).items())
112
+ mf_settings["sql.settings"] = sql_settings
113
+ mf_settings["session.table_lifecycle"] = lifecycle
114
+ mf_settings["session.temp_table_lifecycle"] = temp_lifecycle
115
+ mf_settings["session.quota_name"] = quota_name
116
+ if enable_schema is not None:
117
+ mf_settings["session.enable_schema"] = enable_schema
118
+ if options.session.enable_high_availability is None:
119
+ mf_settings["session.enable_high_availability"] = not in_ipython_frontend()
120
+ mf_settings["session.default_schema"] = default_schema or "default"
121
+ return mf_settings
122
+
61
123
  @abc.abstractmethod
62
124
  def create_session(self) -> SessionInfo:
63
125
  raise NotImplementedError
@@ -68,7 +130,10 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
68
130
 
69
131
  @abc.abstractmethod
70
132
  def submit_dag(
71
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
133
+ self,
134
+ dag: TileableGraph,
135
+ managed_input_infos: Dict[str, ResultInfo],
136
+ new_settings: Dict[str, Any] = None,
72
137
  ) -> DagInfo:
73
138
  raise NotImplementedError
74
139
 
@@ -84,6 +149,9 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
84
149
  def decref(self, tileable_keys: List[str]) -> None:
85
150
  raise NotImplementedError
86
151
 
152
+ def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
153
+ return None
154
+
87
155
 
88
156
  class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
89
157
  _odps_entry: Optional[ODPS]
@@ -119,6 +187,8 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
119
187
  self._tileable_to_infos = weakref.WeakKeyDictionary()
120
188
 
121
189
  self._caller = self._create_caller(odps_entry, address, **kwargs)
190
+ self._last_settings = None
191
+ self._pull_interval = 1 if in_ipython_frontend() else 3
122
192
 
123
193
  @classmethod
124
194
  def _create_caller(
@@ -128,28 +198,32 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
128
198
 
129
199
  async def _init(self, _address: str):
130
200
  session_info = await self.ensure_async_call(self._caller.create_session)
201
+ self._last_settings = copy.deepcopy(self._caller.get_settings_to_upload())
131
202
  self._session_id = session_info.session_id
203
+ await self._show_logview_address()
132
204
 
133
- def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
134
- if (
135
- not isinstance(t.op, PandasDataSourceOperator)
136
- or t.op.get_data() is None
137
- or t.inputs
138
- ):
139
- return None
140
-
141
- schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
205
+ def _upload_and_get_table_read_tileable(
206
+ self, t: TileableType
207
+ ) -> Optional[TileableType]:
208
+ table_schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
142
209
  if self._odps_entry.exist_table(table_meta.table_name):
143
- self._odps_entry.delete_table(table_meta.table_name)
210
+ self._odps_entry.delete_table(
211
+ table_meta.table_name, hints=options.sql.settings
212
+ )
144
213
  table_name = build_temp_table_name(self.session_id, t.key)
145
- table_obj = self._odps_entry.create_table(table_name, schema)
214
+ table_obj = self._odps_entry.create_table(
215
+ table_name,
216
+ table_schema,
217
+ lifecycle=options.session.temp_table_lifecycle,
218
+ hints=options.sql.settings,
219
+ )
146
220
 
147
221
  data = t.op.get_data()
148
222
  batch_size = options.session.upload_batch_size
149
223
 
150
224
  if len(data):
151
- halo_client = HaloTableIO(self._odps_entry)
152
- with halo_client.open_writer(table_obj.full_table_name) as writer:
225
+ table_client = ODPSTableIO(self._odps_entry)
226
+ with table_client.open_writer(table_obj.full_table_name) as writer:
153
227
  for batch_start in range(0, len(data), batch_size):
154
228
  if isinstance(data, pd.Index):
155
229
  batch = data[batch_start : batch_start + batch_size]
@@ -172,13 +246,35 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
172
246
  read_tileable.name = t.name
173
247
  else: # INDEX_TYPE
174
248
  if list(read_tileable.names) != list(t.names):
175
- read_tileable.names = t.names
249
+ read_tileable.rename(t.names, inplace=True)
176
250
  read_tileable._key = t.key
177
251
  read_tileable.params = t.params
178
252
  return read_tileable.data
179
253
 
254
+ def _upload_and_get_vol_read_tileable(
255
+ self, t: TileableType
256
+ ) -> Optional[TileableType]:
257
+ vol_name = build_session_volume_name(self.session_id)
258
+ writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
259
+ io_handler = get_object_io_handler(t)
260
+ io_handler().write_object(writer, t, t.op.data)
261
+ return build_fetch(t).data
262
+
263
+ def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
264
+ if (
265
+ not isinstance(t.op, (ArrayDataSource, PandasDataSourceOperator))
266
+ or t.op.get_data() is None
267
+ or t.inputs
268
+ ):
269
+ return None
270
+ with sync_pyodps_options():
271
+ if isinstance(t.op, PandasDataSourceOperator):
272
+ return self._upload_and_get_table_read_tileable(t)
273
+ else:
274
+ return self._upload_and_get_vol_read_tileable(t)
275
+
180
276
  @enter_mode(kernel=True, build=True)
181
- def _scan_and_replace_pandas_sources(
277
+ def _scan_and_replace_local_sources(
182
278
  self, graph: TileableGraph
183
279
  ) -> Dict[TileableType, TileableType]:
184
280
  """Replaces Pandas data sources with temp table sources in the graph"""
@@ -199,7 +295,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
199
295
 
200
296
  for succ in successors:
201
297
  graph.add_edge(replaced, succ)
202
- succ.inputs = [replacements.get(t, t) for t in succ.inputs]
298
+ succ.op._set_inputs([replacements.get(t, t) for t in succ.inputs])
203
299
 
204
300
  graph.results = [replacements.get(t, t) for t in graph.results]
205
301
  return replacements
@@ -207,16 +303,41 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
207
303
  @enter_mode(kernel=True, build=True)
208
304
  def _get_input_infos(self, tileables: List[TileableType]) -> Dict[str, ResultInfo]:
209
305
  """Generate ResultInfo structs from generated temp tables"""
306
+ vol_name = build_session_volume_name(self.session_id)
307
+
210
308
  infos = dict()
211
309
  for t in tileables:
212
310
  key = t.key
213
- if not isinstance(t.op, DataFrameReadODPSTable):
214
- if not isinstance(t.inputs[0].op, DataFrameReadODPSTable):
215
- continue
216
- t = t.inputs[0]
217
- infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
311
+ if isinstance(t.op, DataFrameReadODPSTable):
312
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
313
+ else:
314
+ if isinstance(t.op, Fetch):
315
+ infos[key] = ODPSVolumeResultInfo(
316
+ volume_name=vol_name, volume_path=t.key
317
+ )
318
+ elif t.inputs and isinstance(t.inputs[0].op, DataFrameReadODPSTable):
319
+ t = t.inputs[0]
320
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
218
321
  return infos
219
322
 
323
+ def _get_diff_settings(self) -> Dict[str, Any]:
324
+ new_settings = self._caller.get_settings_to_upload()
325
+ if not self._last_settings: # pragma: no cover
326
+ self._last_settings = copy.deepcopy(new_settings)
327
+ return new_settings
328
+
329
+ update = dict()
330
+ for k in new_settings.keys():
331
+ old_item = self._last_settings.get(k)
332
+ new_item = new_settings.get(k)
333
+ try:
334
+ if old_item != new_item:
335
+ update[k] = new_item
336
+ except: # noqa: E722 # nosec # pylint: disable=bare-except
337
+ update[k] = new_item
338
+ self._last_settings = copy.deepcopy(new_settings)
339
+ return update
340
+
220
341
  async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
221
342
  tileables = [
222
343
  tileable.data if isinstance(tileable, Entity) else tileable
@@ -226,7 +347,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
226
347
  tileable_graph, to_execute_tileables = gen_submit_tileable_graph(
227
348
  self, tileables, tileable_to_copied
228
349
  )
229
- source_replacements = self._scan_and_replace_pandas_sources(tileable_graph)
350
+ source_replacements = self._scan_and_replace_local_sources(tileable_graph)
230
351
 
231
352
  # we need to manage uploaded data sources with refcounting mechanism
232
353
  # as nodes in tileable_graph are copied, we need to use original nodes
@@ -236,9 +357,14 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
236
357
 
237
358
  replaced_infos = self._get_input_infos(list(source_replacements.values()))
238
359
  dag_info = await self.ensure_async_call(
239
- self._caller.submit_dag, tileable_graph, replaced_infos
360
+ self._caller.submit_dag,
361
+ tileable_graph,
362
+ replaced_infos,
363
+ self._get_diff_settings(),
240
364
  )
241
365
 
366
+ await self._show_logview_address(dag_info.dag_id)
367
+
242
368
  progress = Progress()
243
369
  profiling = Profiling()
244
370
  aio_task = asyncio.create_task(
@@ -256,25 +382,55 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
256
382
  self, dag_info: DagInfo, tileables: List, progress: Progress
257
383
  ):
258
384
  start_time = time.time()
385
+ session_id = dag_info.session_id
259
386
  dag_id = dag_info.dag_id
260
- wait_timeout = 10
387
+ server_no_response_time = None
261
388
  with enter_mode(build=True, kernel=True):
262
389
  key_to_tileables = {t.key: t for t in tileables}
263
-
390
+ timeout_val = 0.1
264
391
  try:
265
392
  while True:
266
393
  elapsed_time = time.time() - start_time
394
+ next_timeout_val = min(timeout_val * 2, self._pull_interval)
267
395
  timeout_val = (
268
- min(self.timeout - elapsed_time, wait_timeout)
396
+ min(self.timeout - elapsed_time, next_timeout_val)
269
397
  if self.timeout
270
- else wait_timeout
398
+ else next_timeout_val
271
399
  )
272
400
  if timeout_val <= 0:
273
401
  raise TimeoutError("Running DAG timed out")
274
402
 
275
- dag_info: DagInfo = await self.ensure_async_call(
276
- self._caller.get_dag_info, dag_id
277
- )
403
+ try:
404
+ dag_info: DagInfo = await self.ensure_async_call(
405
+ self._caller.get_dag_info, dag_id
406
+ )
407
+ server_no_response_time = None
408
+ except (NoTaskServerResponseError, SessionAlreadyClosedError) as ex:
409
+ # when we receive SessionAlreadyClosedError after NoTaskServerResponseError
410
+ # is received, it is possible that task server is restarted and
411
+ # SessionAlreadyClosedError might be flaky. Otherwise, the error
412
+ # should be raised.
413
+ if (
414
+ isinstance(ex, SessionAlreadyClosedError)
415
+ and not server_no_response_time
416
+ ):
417
+ raise
418
+ server_no_response_time = server_no_response_time or time.time()
419
+ if (
420
+ time.time() - server_no_response_time
421
+ > options.client.task_restart_timeout
422
+ ):
423
+ raise MaxFrameError(
424
+ "Failed to get valid response from service. "
425
+ f"Session {self._session_id}."
426
+ ) from None
427
+ await asyncio.sleep(timeout_val)
428
+ continue
429
+
430
+ if dag_info is None:
431
+ raise SystemError(
432
+ f"Cannot find DAG with ID {dag_id} in session {session_id}"
433
+ )
278
434
  progress.value = dag_info.progress
279
435
  if dag_info.status != DagStatus.RUNNING:
280
436
  break
@@ -294,6 +450,8 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
294
450
 
295
451
  for key, result_info in dag_info.tileable_to_result_infos.items():
296
452
  t = key_to_tileables[key]
453
+ fetcher = get_fetcher_cls(result_info.result_type)(self._odps_entry)
454
+ await fetcher.update_tileable_meta(t, result_info)
297
455
  self._tileable_to_infos[t] = result_info
298
456
 
299
457
  def _get_data_tileable_and_indexes(
@@ -334,7 +492,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
334
492
  data_tileable, indexes = self._get_data_tileable_and_indexes(tileable)
335
493
  info = self._tileable_to_infos[data_tileable]
336
494
  fetcher = get_fetcher_cls(info.result_type)(self._odps_entry)
337
- results.append(await fetcher.fetch(tileable, info, indexes))
495
+ results.append(await fetcher.fetch(data_tileable, info, indexes))
338
496
  return results
339
497
 
340
498
  async def decref(self, *tileable_keys):
@@ -388,12 +546,32 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
388
546
  async def get_mutable_tensor(self, name: str):
389
547
  raise NotImplementedError
390
548
 
549
+ async def get_logview_address(self, hours=None) -> Optional[str]:
550
+ return await self.get_dag_logview_address(None, hours)
551
+
552
+ async def get_dag_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
553
+ return await self.ensure_async_call(
554
+ self._caller.get_logview_address, dag_id, hours
555
+ )
556
+
557
+ async def _show_logview_address(self, dag_id=None, hours=None):
558
+ identity = f"Session ID: {self._session_id}"
559
+ if dag_id:
560
+ identity += f", DAG ID: {dag_id}"
561
+
562
+ logview_addr = await self.get_dag_logview_address(dag_id, hours)
563
+ if logview_addr:
564
+ logger.info("%s, Logview: %s", identity, logview_addr)
565
+ else:
566
+ logger.info("%s, Logview address does not exist", identity)
567
+
391
568
 
392
569
  class MaxFrameRestCaller(MaxFrameServiceCaller):
393
570
  _client: FrameDriverClient
394
571
  _session_id: Optional[str]
395
572
 
396
- def __init__(self, client: FrameDriverClient):
573
+ def __init__(self, odps_entry: ODPS, client: FrameDriverClient):
574
+ self._odps_entry = odps_entry
397
575
  self._client = client
398
576
  self._session_id = None
399
577
 
@@ -406,9 +584,14 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
406
584
  await self._client.delete_session(self._session_id)
407
585
 
408
586
  async def submit_dag(
409
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
587
+ self,
588
+ dag: TileableGraph,
589
+ managed_input_infos: Dict[str, ResultInfo] = None,
590
+ new_settings: Dict[str, Any] = None,
410
591
  ) -> DagInfo:
411
- return await self._client.submit_dag(self._session_id, dag, managed_input_infos)
592
+ return await self._client.submit_dag(
593
+ self._session_id, dag, managed_input_infos, new_settings=new_settings
594
+ )
412
595
 
413
596
  async def get_dag_info(self, dag_id: str) -> DagInfo:
414
597
  return await self._client.get_dag_info(self._session_id, dag_id)
@@ -446,7 +629,7 @@ class MaxFrameRestSession(MaxFrameSession):
446
629
 
447
630
  @classmethod
448
631
  def _create_caller(cls, odps_entry: ODPS, address: str, **kwargs):
449
- return MaxFrameRestCaller(FrameDriverClient(address))
632
+ return MaxFrameRestCaller(odps_entry, FrameDriverClient(address))
450
633
 
451
634
 
452
635
  def register_session_schemes(overwrite: bool = False):