maxframe 1.0.0rc3__cp310-cp310-macosx_10_9_universal2.whl → 1.1.0__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (112) hide show
  1. maxframe/_utils.cpython-310-darwin.so +0 -0
  2. maxframe/codegen.py +1 -0
  3. maxframe/config/config.py +16 -1
  4. maxframe/conftest.py +52 -14
  5. maxframe/core/entity/executable.py +1 -1
  6. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  7. maxframe/core/operator/base.py +2 -0
  8. maxframe/dataframe/arithmetic/docstring.py +26 -2
  9. maxframe/dataframe/arithmetic/equal.py +4 -2
  10. maxframe/dataframe/arithmetic/greater.py +4 -2
  11. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  12. maxframe/dataframe/arithmetic/less.py +2 -2
  13. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  14. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  15. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
  16. maxframe/dataframe/core.py +26 -2
  17. maxframe/dataframe/datasource/read_odps_query.py +116 -28
  18. maxframe/dataframe/datasource/read_odps_table.py +3 -1
  19. maxframe/dataframe/datasource/tests/test_datasource.py +93 -12
  20. maxframe/dataframe/datastore/to_odps.py +7 -0
  21. maxframe/dataframe/extensions/__init__.py +8 -0
  22. maxframe/dataframe/extensions/apply_chunk.py +649 -0
  23. maxframe/dataframe/extensions/flatjson.py +131 -0
  24. maxframe/dataframe/extensions/flatmap.py +314 -0
  25. maxframe/dataframe/extensions/reshuffle.py +1 -1
  26. maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
  27. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  28. maxframe/dataframe/groupby/__init__.py +1 -0
  29. maxframe/dataframe/groupby/aggregation.py +1 -0
  30. maxframe/dataframe/groupby/apply.py +9 -1
  31. maxframe/dataframe/groupby/core.py +1 -1
  32. maxframe/dataframe/groupby/fill.py +4 -1
  33. maxframe/dataframe/groupby/getitem.py +6 -0
  34. maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
  35. maxframe/dataframe/groupby/transform.py +8 -2
  36. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  37. maxframe/dataframe/indexing/loc.py +6 -4
  38. maxframe/dataframe/indexing/rename.py +11 -0
  39. maxframe/dataframe/initializer.py +11 -1
  40. maxframe/dataframe/merge/__init__.py +9 -1
  41. maxframe/dataframe/merge/concat.py +41 -31
  42. maxframe/dataframe/merge/merge.py +1 -1
  43. maxframe/dataframe/merge/tests/test_merge.py +3 -1
  44. maxframe/dataframe/misc/apply.py +3 -0
  45. maxframe/dataframe/misc/drop_duplicates.py +23 -2
  46. maxframe/dataframe/misc/map.py +3 -1
  47. maxframe/dataframe/misc/tests/test_misc.py +24 -2
  48. maxframe/dataframe/misc/transform.py +22 -13
  49. maxframe/dataframe/reduction/__init__.py +3 -0
  50. maxframe/dataframe/reduction/aggregation.py +1 -0
  51. maxframe/dataframe/reduction/median.py +56 -0
  52. maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
  53. maxframe/dataframe/statistics/quantile.py +8 -2
  54. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  55. maxframe/dataframe/tests/test_initializer.py +33 -2
  56. maxframe/dataframe/tests/test_utils.py +60 -0
  57. maxframe/dataframe/utils.py +110 -7
  58. maxframe/dataframe/window/expanding.py +5 -3
  59. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  60. maxframe/io/objects/tests/test_object_io.py +39 -12
  61. maxframe/io/odpsio/arrow.py +30 -2
  62. maxframe/io/odpsio/schema.py +28 -8
  63. maxframe/io/odpsio/tableio.py +55 -133
  64. maxframe/io/odpsio/tests/test_schema.py +40 -4
  65. maxframe/io/odpsio/tests/test_tableio.py +5 -5
  66. maxframe/io/odpsio/tests/test_volumeio.py +35 -11
  67. maxframe/io/odpsio/volumeio.py +36 -6
  68. maxframe/learn/contrib/__init__.py +3 -1
  69. maxframe/learn/contrib/graph/__init__.py +15 -0
  70. maxframe/learn/contrib/graph/connected_components.py +215 -0
  71. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  72. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  73. maxframe/learn/contrib/llm/__init__.py +16 -0
  74. maxframe/learn/contrib/llm/core.py +54 -0
  75. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  76. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  77. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  78. maxframe/learn/contrib/llm/text.py +42 -0
  79. maxframe/learn/contrib/xgboost/classifier.py +3 -3
  80. maxframe/learn/contrib/xgboost/predict.py +8 -39
  81. maxframe/learn/contrib/xgboost/train.py +4 -3
  82. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  83. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  84. maxframe/opcodes.py +10 -1
  85. maxframe/protocol.py +6 -1
  86. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  87. maxframe/serialization/core.pyx +13 -1
  88. maxframe/serialization/pandas.py +50 -20
  89. maxframe/serialization/serializables/core.py +24 -5
  90. maxframe/serialization/serializables/field_type.py +4 -1
  91. maxframe/serialization/serializables/tests/test_serializable.py +8 -1
  92. maxframe/serialization/tests/test_serial.py +2 -1
  93. maxframe/session.py +9 -2
  94. maxframe/tensor/__init__.py +19 -7
  95. maxframe/tensor/indexing/getitem.py +2 -0
  96. maxframe/tensor/merge/concatenate.py +23 -20
  97. maxframe/tensor/merge/vstack.py +5 -1
  98. maxframe/tensor/misc/transpose.py +1 -1
  99. maxframe/tests/utils.py +16 -0
  100. maxframe/udf.py +27 -0
  101. maxframe/utils.py +64 -14
  102. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
  103. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/RECORD +112 -96
  104. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
  105. maxframe_client/clients/framedriver.py +4 -1
  106. maxframe_client/fetcher.py +28 -10
  107. maxframe_client/session/consts.py +3 -0
  108. maxframe_client/session/odps.py +104 -20
  109. maxframe_client/session/task.py +42 -26
  110. maxframe_client/session/tests/test_task.py +0 -4
  111. maxframe_client/tests/test_session.py +44 -12
  112. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
@@ -14,16 +14,19 @@
14
14
 
15
15
  import abc
16
16
  import asyncio
17
+ import copy
17
18
  import logging
18
19
  import time
19
20
  import weakref
20
21
  from numbers import Integral
21
- from typing import Dict, List, Mapping, Optional, Tuple, Union
22
+ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
22
23
  from urllib.parse import urlparse
23
24
 
24
25
  import numpy as np
25
26
  import pandas as pd
26
27
  from odps import ODPS
28
+ from odps import options as odps_options
29
+ from odps.console import in_ipython_frontend
27
30
 
28
31
  from maxframe.config import options
29
32
  from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
@@ -65,6 +68,8 @@ from maxframe.utils import (
65
68
  ToThreadMixin,
66
69
  build_session_volume_name,
67
70
  build_temp_table_name,
71
+ str_to_bool,
72
+ sync_pyodps_options,
68
73
  )
69
74
 
70
75
  from ..clients.framedriver import FrameDriverClient
@@ -76,6 +81,45 @@ logger = logging.getLogger(__name__)
76
81
 
77
82
 
78
83
  class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
84
+ def get_settings_to_upload(self) -> Dict[str, Any]:
85
+ sql_settings = (odps_options.sql.settings or {}).copy()
86
+ sql_settings.update(options.sql.settings or {})
87
+
88
+ quota_name = options.session.quota_name or getattr(
89
+ odps_options, "quota_name", None
90
+ )
91
+ lifecycle = options.session.table_lifecycle or odps_options.lifecycle
92
+ temp_lifecycle = (
93
+ options.session.temp_table_lifecycle or odps_options.temp_lifecycle
94
+ )
95
+
96
+ enable_schema = options.session.enable_schema
97
+ default_schema = options.session.default_schema
98
+ if hasattr(self, "_odps_entry"):
99
+ default_schema = default_schema or self._odps_entry.schema
100
+
101
+ # use flags in sql settings
102
+ if sql_settings.get("odps.default.schema"):
103
+ default_schema = sql_settings["odps.default.schema"]
104
+ if str_to_bool(
105
+ sql_settings.get("odps.namespace.schema") or "false"
106
+ ) or str_to_bool(
107
+ sql_settings.get("odps.sql.allow.namespace.schema") or "false"
108
+ ):
109
+ enable_schema = True
110
+
111
+ mf_settings = dict(options.to_dict(remote_only=True).items())
112
+ mf_settings["sql.settings"] = sql_settings
113
+ mf_settings["session.table_lifecycle"] = lifecycle
114
+ mf_settings["session.temp_table_lifecycle"] = temp_lifecycle
115
+ mf_settings["session.quota_name"] = quota_name
116
+ if enable_schema is not None:
117
+ mf_settings["session.enable_schema"] = enable_schema
118
+ if options.session.enable_high_availability is None:
119
+ mf_settings["session.enable_high_availability"] = not in_ipython_frontend()
120
+ mf_settings["session.default_schema"] = default_schema or "default"
121
+ return mf_settings
122
+
79
123
  @abc.abstractmethod
80
124
  def create_session(self) -> SessionInfo:
81
125
  raise NotImplementedError
@@ -86,7 +130,10 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
86
130
 
87
131
  @abc.abstractmethod
88
132
  def submit_dag(
89
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
133
+ self,
134
+ dag: TileableGraph,
135
+ managed_input_infos: Dict[str, ResultInfo],
136
+ new_settings: Dict[str, Any] = None,
90
137
  ) -> DagInfo:
91
138
  raise NotImplementedError
92
139
 
@@ -140,6 +187,9 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
140
187
  self._tileable_to_infos = weakref.WeakKeyDictionary()
141
188
 
142
189
  self._caller = self._create_caller(odps_entry, address, **kwargs)
190
+ self._last_settings = None
191
+ self._pull_interval = 1 if in_ipython_frontend() else 3
192
+ self._replace_internal_host = kwargs.get("replace_internal_host", True)
143
193
 
144
194
  @classmethod
145
195
  def _create_caller(
@@ -149,13 +199,14 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
149
199
 
150
200
  async def _init(self, _address: str):
151
201
  session_info = await self.ensure_async_call(self._caller.create_session)
202
+ self._last_settings = copy.deepcopy(self._caller.get_settings_to_upload())
152
203
  self._session_id = session_info.session_id
153
204
  await self._show_logview_address()
154
205
 
155
206
  def _upload_and_get_table_read_tileable(
156
207
  self, t: TileableType
157
208
  ) -> Optional[TileableType]:
158
- schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
209
+ table_schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
159
210
  if self._odps_entry.exist_table(table_meta.table_name):
160
211
  self._odps_entry.delete_table(
161
212
  table_meta.table_name, hints=options.sql.settings
@@ -163,7 +214,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
163
214
  table_name = build_temp_table_name(self.session_id, t.key)
164
215
  table_obj = self._odps_entry.create_table(
165
216
  table_name,
166
- schema,
217
+ table_schema,
167
218
  lifecycle=options.session.temp_table_lifecycle,
168
219
  hints=options.sql.settings,
169
220
  )
@@ -205,7 +256,12 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
205
256
  self, t: TileableType
206
257
  ) -> Optional[TileableType]:
207
258
  vol_name = build_session_volume_name(self.session_id)
208
- writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
259
+ writer = ODPSVolumeWriter(
260
+ self._odps_entry,
261
+ vol_name,
262
+ t.key,
263
+ replace_internal_host=self._replace_internal_host,
264
+ )
209
265
  io_handler = get_object_io_handler(t)
210
266
  io_handler().write_object(writer, t, t.op.data)
211
267
  return build_fetch(t).data
@@ -217,10 +273,11 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
217
273
  or t.inputs
218
274
  ):
219
275
  return None
220
- if isinstance(t.op, PandasDataSourceOperator):
221
- return self._upload_and_get_table_read_tileable(t)
222
- else:
223
- return self._upload_and_get_vol_read_tileable(t)
276
+ with sync_pyodps_options():
277
+ if isinstance(t.op, PandasDataSourceOperator):
278
+ return self._upload_and_get_table_read_tileable(t)
279
+ else:
280
+ return self._upload_and_get_vol_read_tileable(t)
224
281
 
225
282
  @enter_mode(kernel=True, build=True)
226
283
  def _scan_and_replace_local_sources(
@@ -244,7 +301,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
244
301
 
245
302
  for succ in successors:
246
303
  graph.add_edge(replaced, succ)
247
- succ.inputs = [replacements.get(t, t) for t in succ.inputs]
304
+ succ.op._set_inputs([replacements.get(t, t) for t in succ.inputs])
248
305
 
249
306
  graph.results = [replacements.get(t, t) for t in graph.results]
250
307
  return replacements
@@ -269,6 +326,24 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
269
326
  infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
270
327
  return infos
271
328
 
329
+ def _get_diff_settings(self) -> Dict[str, Any]:
330
+ new_settings = self._caller.get_settings_to_upload()
331
+ if not self._last_settings: # pragma: no cover
332
+ self._last_settings = copy.deepcopy(new_settings)
333
+ return new_settings
334
+
335
+ update = dict()
336
+ for k in new_settings.keys():
337
+ old_item = self._last_settings.get(k)
338
+ new_item = new_settings.get(k)
339
+ try:
340
+ if old_item != new_item:
341
+ update[k] = new_item
342
+ except: # noqa: E722 # nosec # pylint: disable=bare-except
343
+ update[k] = new_item
344
+ self._last_settings = copy.deepcopy(new_settings)
345
+ return update
346
+
272
347
  async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
273
348
  tileables = [
274
349
  tileable.data if isinstance(tileable, Entity) else tileable
@@ -288,7 +363,10 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
288
363
 
289
364
  replaced_infos = self._get_input_infos(list(source_replacements.values()))
290
365
  dag_info = await self.ensure_async_call(
291
- self._caller.submit_dag, tileable_graph, replaced_infos
366
+ self._caller.submit_dag,
367
+ tileable_graph,
368
+ replaced_infos,
369
+ self._get_diff_settings(),
292
370
  )
293
371
 
294
372
  await self._show_logview_address(dag_info.dag_id)
@@ -312,18 +390,18 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
312
390
  start_time = time.time()
313
391
  session_id = dag_info.session_id
314
392
  dag_id = dag_info.dag_id
315
- wait_timeout = 10
316
393
  server_no_response_time = None
317
394
  with enter_mode(build=True, kernel=True):
318
395
  key_to_tileables = {t.key: t for t in tileables}
319
-
396
+ timeout_val = 0.1
320
397
  try:
321
398
  while True:
322
399
  elapsed_time = time.time() - start_time
400
+ next_timeout_val = min(timeout_val * 2, self._pull_interval)
323
401
  timeout_val = (
324
- min(self.timeout - elapsed_time, wait_timeout)
402
+ min(self.timeout - elapsed_time, next_timeout_val)
325
403
  if self.timeout
326
- else wait_timeout
404
+ else next_timeout_val
327
405
  )
328
406
  if timeout_val <= 0:
329
407
  raise TimeoutError("Running DAG timed out")
@@ -498,7 +576,8 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
498
576
  _client: FrameDriverClient
499
577
  _session_id: Optional[str]
500
578
 
501
- def __init__(self, client: FrameDriverClient):
579
+ def __init__(self, odps_entry: ODPS, client: FrameDriverClient):
580
+ self._odps_entry = odps_entry
502
581
  self._client = client
503
582
  self._session_id = None
504
583
 
@@ -511,9 +590,14 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
511
590
  await self._client.delete_session(self._session_id)
512
591
 
513
592
  async def submit_dag(
514
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
593
+ self,
594
+ dag: TileableGraph,
595
+ managed_input_infos: Dict[str, ResultInfo] = None,
596
+ new_settings: Dict[str, Any] = None,
515
597
  ) -> DagInfo:
516
- return await self._client.submit_dag(self._session_id, dag, managed_input_infos)
598
+ return await self._client.submit_dag(
599
+ self._session_id, dag, managed_input_infos, new_settings=new_settings
600
+ )
517
601
 
518
602
  async def get_dag_info(self, dag_id: str) -> DagInfo:
519
603
  return await self._client.get_dag_info(self._session_id, dag_id)
@@ -546,12 +630,12 @@ class MaxFrameRestSession(MaxFrameSession):
546
630
  real_endpoint = address.replace(f"{parsed_endpoint.scheme}://", f"{scheme}://")
547
631
 
548
632
  super().__init__(
549
- real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout
633
+ real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout, **kwargs
550
634
  )
551
635
 
552
636
  @classmethod
553
637
  def _create_caller(cls, odps_entry: ODPS, address: str, **kwargs):
554
- return MaxFrameRestCaller(FrameDriverClient(address))
638
+ return MaxFrameRestCaller(odps_entry, FrameDriverClient(address))
555
639
 
556
640
 
557
641
  def register_session_schemes(overwrite: bool = False):
@@ -16,12 +16,12 @@ import base64
16
16
  import json
17
17
  import logging
18
18
  import time
19
- from typing import Dict, List, Optional, Type, Union
19
+ from typing import Any, Dict, List, Optional, Type, Union
20
20
 
21
21
  import msgpack
22
22
  from odps import ODPS
23
23
  from odps import options as odps_options
24
- from odps.errors import parse_instance_error
24
+ from odps.errors import EmptyTaskInfoError, parse_instance_error
25
25
  from odps.models import Instance, MaxFrameTask
26
26
 
27
27
  from maxframe.config import options
@@ -36,6 +36,7 @@ except ImportError:
36
36
  mf_version = None
37
37
 
38
38
  from .consts import (
39
+ EMPTY_RESPONSE_RETRY_COUNT,
39
40
  MAXFRAME_DEFAULT_PROTOCOL,
40
41
  MAXFRAME_OUTPUT_JSON_FORMAT,
41
42
  MAXFRAME_OUTPUT_MAXFRAME_FORMAT,
@@ -92,6 +93,10 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
92
93
  self._nested = True
93
94
  self._instance = odps_entry.get_instance(nested_instance_id)
94
95
 
96
+ @property
97
+ def instance(self):
98
+ return self._instance
99
+
95
100
  def _deserial_task_info_result(
96
101
  self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
97
102
  ):
@@ -120,21 +125,9 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
120
125
  )
121
126
 
122
127
  def _create_maxframe_task(self) -> MaxFrameTask:
123
- task = MaxFrameTask(
124
- name=self._task_name,
125
- major_version=self._major_version,
126
- service_endpoint=self._odps_entry.endpoint,
127
- )
128
-
129
- # merge sql options
130
- sql_settings = (odps_options.sql.settings or {}).copy()
131
- sql_settings.update(options.sql.settings or {})
132
-
133
- mf_settings = dict(options.to_dict(remote_only=True).items())
134
- mf_settings["sql.settings"] = sql_settings
135
-
128
+ task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
136
129
  mf_opts = {
137
- "odps.maxframe.settings": json.dumps(mf_settings),
130
+ "odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
138
131
  "odps.maxframe.output_format": self._output_format,
139
132
  }
140
133
  if mf_version:
@@ -189,18 +182,27 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
189
182
  interval = min(max_interval, interval * 2)
190
183
 
191
184
  def _put_task_info(self, method_name: str, json_data: dict):
192
- resp_data = self._instance.put_task_info(
193
- self._task_name, method_name, json.dumps(json_data)
194
- )
195
- if not resp_data:
196
- raise NoTaskServerResponseError(f"No response for request {method_name}")
197
- return resp_data
185
+ for trial in range(EMPTY_RESPONSE_RETRY_COUNT):
186
+ try:
187
+ return self._instance.put_task_info(
188
+ self._task_name,
189
+ method_name,
190
+ json.dumps(json_data),
191
+ raise_empty=True,
192
+ )
193
+ except EmptyTaskInfoError as ex:
194
+ # retry when server returns HTTP 204, which is designed for retry
195
+ if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
196
+ raise NoTaskServerResponseError(
197
+ f"No response for request {method_name}. "
198
+ f"Instance ID: {self._instance.id}. "
199
+ f"Request ID: {ex.request_id}"
200
+ ) from None
201
+ time.sleep(0.5)
198
202
 
199
203
  def get_session(self) -> SessionInfo:
200
204
  req_data = {"output_format": self._output_format}
201
- serialized = self._instance.put_task_info(
202
- self._task_name, MAXFRAME_TASK_GET_SESSION_METHOD, json.dumps(req_data)
203
- )
205
+ serialized = self._put_task_info(MAXFRAME_TASK_GET_SESSION_METHOD, req_data)
204
206
  info: SessionInfo = self._deserial_task_info_result(serialized, SessionInfo)
205
207
  info.session_id = self._instance.id
206
208
  return info
@@ -217,13 +219,18 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
217
219
  self,
218
220
  dag: TileableGraph,
219
221
  managed_input_infos: Optional[Dict[str, ResultInfo]] = None,
222
+ new_settings: Dict[str, Any] = None,
220
223
  ) -> DagInfo:
224
+ new_settings_value = {
225
+ "odps.maxframe.settings": json.dumps(new_settings),
226
+ }
221
227
  req_data = {
222
228
  "protocol": MAXFRAME_DEFAULT_PROTOCOL,
223
229
  "dag": base64.b64encode(serialize_serializable(dag)).decode(),
224
230
  "managed_input_infos": base64.b64encode(
225
231
  serialize_serializable(managed_input_infos)
226
232
  ).decode(),
233
+ "new_settings": json.dumps(new_settings_value),
227
234
  "output_format": self._output_format,
228
235
  }
229
236
  res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
@@ -276,7 +283,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
276
283
  class MaxFrameTaskSession(MaxFrameSession):
277
284
  schemes = [ODPS_SESSION_INSECURE_SCHEME, ODPS_SESSION_SECURE_SCHEME]
278
285
 
279
- _instance: Instance
286
+ _caller: MaxFrameInstanceCaller
280
287
 
281
288
  @classmethod
282
289
  def _create_caller(
@@ -296,6 +303,15 @@ class MaxFrameTaskSession(MaxFrameSession):
296
303
  **kwargs,
297
304
  )
298
305
 
306
+ @property
307
+ def closed(self) -> bool:
308
+ if super().closed:
309
+ return True
310
+ if not self._caller or not self._caller.instance:
311
+ # session not initialized yet
312
+ return False
313
+ return self._caller.instance.is_terminated()
314
+
299
315
 
300
316
  def register_session_schemes(overwrite: bool = False):
301
317
  MaxFrameTaskSession.register_schemes(overwrite=overwrite)
@@ -53,10 +53,6 @@ def test_maxframe_instance_caller_creating_session():
53
53
  assert property_node.find("Name").text == "settings"
54
54
  setting_dict = json.loads(property_node.find("Value").text)
55
55
  assert setting_dict["odps.task.major.version"] == "test_version"
56
- assert (
57
- setting_dict["odps.service.endpoint"]
58
- == "http://100.69.248.78:8002/odps_dailyrunnew"
59
- )
60
56
 
61
57
  assert setting_dict["odps.maxframe.output_format"] == "json"
62
58
  maxframe_setting_dict = json.loads(setting_dict["odps.maxframe.settings"])
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import time
16
- from typing import Dict
16
+ from typing import Any, Dict
17
17
 
18
18
  import mock
19
19
  import numpy as np
@@ -31,7 +31,7 @@ from maxframe.lib.aio import stop_isolation
31
31
  from maxframe.protocol import ResultInfo
32
32
  from maxframe.serialization import RemoteException
33
33
  from maxframe.session import new_session
34
- from maxframe.tests.utils import tn
34
+ from maxframe.tests.utils import ensure_table_deleted, tn
35
35
  from maxframe.utils import build_temp_table_name
36
36
  from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
37
37
  framedriver_app,
@@ -86,9 +86,12 @@ def test_simple_run_dataframe(start_mock_session):
86
86
  session_id: str,
87
87
  dag: TileableGraph,
88
88
  managed_input_infos: Dict[str, ResultInfo] = None,
89
+ new_settings: Dict[str, Any] = None,
89
90
  ):
90
91
  assert len(dag) == 2
91
- return await original_submit_dag(self, session_id, dag, managed_input_infos)
92
+ return await original_submit_dag(
93
+ self, session_id, dag, managed_input_infos, new_settings
94
+ )
92
95
 
93
96
  no_task_server_raised = False
94
97
  original_get_dag_info = MaxFrameRestCaller.get_dag_info
@@ -130,11 +133,19 @@ def test_simple_run_dataframe(start_mock_session):
130
133
  )
131
134
  assert odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
132
135
  del df
133
- time.sleep(5)
134
- assert not odps_entry.exist_table(
135
- build_temp_table_name(start_mock_session, intermediate_key)
136
+ ensure_table_deleted(
137
+ odps_entry, build_temp_table_name(start_mock_session, intermediate_key)
136
138
  )
137
- assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
139
+ ensure_table_deleted(odps_entry, build_temp_table_name(start_mock_session, key))
140
+
141
+
142
+ def test_run_and_fetch_slice(start_mock_session):
143
+ pd_df = pd.DataFrame(np.random.rand(1000, 5), columns=list("ABCDE"))
144
+ df = md.DataFrame(pd_df)
145
+ result = df.execute()
146
+
147
+ sliced = result.head(10).fetch()
148
+ assert len(sliced) == 10
138
149
 
139
150
 
140
151
  def test_run_empty_table(start_mock_session):
@@ -157,6 +168,25 @@ def test_run_empty_table(start_mock_session):
157
168
  empty_table.drop()
158
169
 
159
170
 
171
+ def test_run_odps_query_without_schema(start_mock_session):
172
+ odps_entry = ODPS.from_environments()
173
+
174
+ table_name = tn("test_query_without_schema")
175
+ odps_entry.delete_table(table_name, if_exists=True)
176
+ test_table = odps_entry.create_table(table_name, "a double, b double", lifecycle=1)
177
+
178
+ with test_table.open_writer() as writer:
179
+ writer.write([123, 456])
180
+
181
+ df = md.read_odps_query(
182
+ f"select a, b, a + b as `special: name` from {table_name}", skip_schema=True
183
+ )
184
+ executed = df.execute().fetch()
185
+ assert len(executed.dtypes) == 3
186
+
187
+ test_table.drop()
188
+
189
+
160
190
  def test_run_dataframe_with_pd_source(start_mock_session):
161
191
  odps_entry = ODPS.from_environments()
162
192
 
@@ -189,7 +219,7 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
189
219
  table_name = build_temp_table_name(start_mock_session, "tmp_save")
190
220
  table_obj = odps_entry.get_table(table_name)
191
221
  try:
192
- md.to_odps_table(md.DataFrame(pd_df), table_obj).execute().fetch()
222
+ md.to_odps_table(md.DataFrame(pd_df), table_obj, lifecycle=1).execute().fetch()
193
223
  with table_obj.open_reader() as reader:
194
224
  result_df = reader.to_pandas()
195
225
  assert len(result_df) == 10
@@ -237,9 +267,8 @@ def test_run_and_fetch_series(start_mock_session):
237
267
  pd.testing.assert_series_equal(pd_result, result)
238
268
 
239
269
  del s1
240
- time.sleep(5)
241
- assert not odps_entry.exist_table(
242
- build_temp_table_name(start_mock_session, src_key)
270
+ ensure_table_deleted(
271
+ odps_entry, build_temp_table_name(start_mock_session, src_key)
243
272
  )
244
273
  finally:
245
274
  odps_entry.delete_table(
@@ -256,7 +285,10 @@ def test_execute_with_tensor(oss_config, start_mock_session):
256
285
 
257
286
  result = (df - [1, 2]).execute().fetch()
258
287
  expected = pd_df - [1, 2]
259
- pd.testing.assert_frame_equal(result, expected)
288
+ # TODO: currently the record order in tensor reading from table is the index
289
+ # sorting order
290
+ expected.sort_index(axis=0, inplace=True)
291
+ pd.testing.assert_frame_equal(result, expected, check_like=True)
260
292
 
261
293
 
262
294
  def test_run_remote_success(oss_config, start_mock_session):