maxframe 1.0.0rc2__cp311-cp311-win32.whl → 1.0.0rc4__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (134) hide show
  1. maxframe/_utils.cp311-win32.pyd +0 -0
  2. maxframe/codegen.py +4 -2
  3. maxframe/config/config.py +28 -9
  4. maxframe/config/validators.py +42 -12
  5. maxframe/conftest.py +56 -14
  6. maxframe/core/__init__.py +2 -13
  7. maxframe/core/entity/__init__.py +0 -4
  8. maxframe/core/entity/executable.py +1 -1
  9. maxframe/core/entity/objects.py +45 -2
  10. maxframe/core/entity/output_types.py +0 -3
  11. maxframe/core/entity/tests/test_objects.py +43 -0
  12. maxframe/core/entity/tileables.py +5 -78
  13. maxframe/core/graph/__init__.py +2 -2
  14. maxframe/core/graph/builder/__init__.py +0 -1
  15. maxframe/core/graph/builder/base.py +5 -4
  16. maxframe/core/graph/builder/tileable.py +4 -4
  17. maxframe/core/graph/builder/utils.py +4 -8
  18. maxframe/core/graph/core.cp311-win32.pyd +0 -0
  19. maxframe/core/graph/entity.py +9 -33
  20. maxframe/core/operator/__init__.py +2 -9
  21. maxframe/core/operator/base.py +3 -5
  22. maxframe/core/operator/objects.py +0 -9
  23. maxframe/core/operator/utils.py +55 -0
  24. maxframe/dataframe/arithmetic/docstring.py +26 -2
  25. maxframe/dataframe/arithmetic/equal.py +4 -2
  26. maxframe/dataframe/arithmetic/greater.py +4 -2
  27. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  28. maxframe/dataframe/arithmetic/less.py +2 -2
  29. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  30. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  31. maxframe/dataframe/core.py +2 -0
  32. maxframe/dataframe/datasource/read_odps_query.py +67 -8
  33. maxframe/dataframe/datasource/read_odps_table.py +4 -2
  34. maxframe/dataframe/datasource/tests/test_datasource.py +35 -6
  35. maxframe/dataframe/datastore/to_odps.py +8 -1
  36. maxframe/dataframe/extensions/__init__.py +3 -0
  37. maxframe/dataframe/extensions/flatmap.py +326 -0
  38. maxframe/dataframe/extensions/tests/test_extensions.py +62 -1
  39. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  40. maxframe/dataframe/indexing/rename.py +11 -0
  41. maxframe/dataframe/initializer.py +11 -1
  42. maxframe/dataframe/misc/drop_duplicates.py +18 -1
  43. maxframe/dataframe/operators.py +1 -17
  44. maxframe/dataframe/reduction/core.py +2 -2
  45. maxframe/dataframe/tests/test_initializer.py +33 -2
  46. maxframe/io/objects/__init__.py +24 -0
  47. maxframe/io/objects/core.py +140 -0
  48. maxframe/io/objects/tensor.py +76 -0
  49. maxframe/io/objects/tests/__init__.py +13 -0
  50. maxframe/io/objects/tests/test_object_io.py +97 -0
  51. maxframe/{odpsio → io/odpsio}/__init__.py +2 -0
  52. maxframe/{odpsio → io/odpsio}/arrow.py +4 -4
  53. maxframe/{odpsio → io/odpsio}/schema.py +10 -8
  54. maxframe/{odpsio → io/odpsio}/tableio.py +50 -38
  55. maxframe/io/odpsio/tests/__init__.py +13 -0
  56. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +3 -7
  57. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +3 -3
  58. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  59. maxframe/io/odpsio/volumeio.py +63 -0
  60. maxframe/learn/contrib/__init__.py +2 -1
  61. maxframe/learn/contrib/graph/__init__.py +15 -0
  62. maxframe/learn/contrib/graph/connected_components.py +215 -0
  63. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  64. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  65. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  66. maxframe/learn/contrib/xgboost/core.py +87 -2
  67. maxframe/learn/contrib/xgboost/dmatrix.py +1 -4
  68. maxframe/learn/contrib/xgboost/predict.py +27 -44
  69. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  70. maxframe/learn/contrib/xgboost/train.py +27 -16
  71. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  72. maxframe/lib/mmh3.cp311-win32.pyd +0 -0
  73. maxframe/opcodes.py +3 -0
  74. maxframe/protocol.py +7 -16
  75. maxframe/remote/core.py +4 -8
  76. maxframe/serialization/__init__.py +1 -0
  77. maxframe/serialization/core.cp311-win32.pyd +0 -0
  78. maxframe/session.py +9 -2
  79. maxframe/tensor/__init__.py +10 -2
  80. maxframe/tensor/arithmetic/isclose.py +1 -0
  81. maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
  82. maxframe/tensor/core.py +5 -136
  83. maxframe/tensor/datasource/array.py +3 -0
  84. maxframe/tensor/datasource/full.py +1 -1
  85. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  86. maxframe/tensor/indexing/flatnonzero.py +1 -1
  87. maxframe/tensor/indexing/getitem.py +2 -0
  88. maxframe/tensor/merge/__init__.py +2 -0
  89. maxframe/tensor/merge/concatenate.py +101 -0
  90. maxframe/tensor/merge/tests/test_merge.py +30 -1
  91. maxframe/tensor/merge/vstack.py +74 -0
  92. maxframe/tensor/{base → misc}/__init__.py +2 -0
  93. maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
  94. maxframe/tensor/misc/atleast_2d.py +70 -0
  95. maxframe/tensor/misc/atleast_3d.py +85 -0
  96. maxframe/tensor/misc/tests/__init__.py +13 -0
  97. maxframe/tensor/{base → misc}/transpose.py +22 -18
  98. maxframe/tensor/operators.py +1 -7
  99. maxframe/tensor/random/core.py +1 -1
  100. maxframe/tensor/reduction/count_nonzero.py +1 -0
  101. maxframe/tensor/reduction/mean.py +1 -0
  102. maxframe/tensor/reduction/nanmean.py +1 -0
  103. maxframe/tensor/reduction/nanvar.py +2 -0
  104. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  105. maxframe/tensor/reduction/var.py +2 -0
  106. maxframe/tensor/utils.py +2 -22
  107. maxframe/typing_.py +4 -1
  108. maxframe/udf.py +8 -9
  109. maxframe/utils.py +49 -73
  110. maxframe-1.0.0rc4.dist-info/METADATA +104 -0
  111. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc4.dist-info}/RECORD +129 -114
  112. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc4.dist-info}/WHEEL +1 -1
  113. maxframe_client/fetcher.py +33 -50
  114. maxframe_client/session/consts.py +3 -0
  115. maxframe_client/session/graph.py +8 -2
  116. maxframe_client/session/odps.py +134 -27
  117. maxframe_client/session/task.py +58 -20
  118. maxframe_client/tests/test_fetcher.py +1 -1
  119. maxframe_client/tests/test_session.py +27 -3
  120. maxframe/core/entity/chunks.py +0 -68
  121. maxframe/core/entity/fuse.py +0 -73
  122. maxframe/core/graph/builder/chunk.py +0 -430
  123. maxframe/odpsio/volumeio.py +0 -95
  124. maxframe-1.0.0rc2.dist-info/METADATA +0 -177
  125. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  126. /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
  127. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  128. /maxframe/tensor/{base → misc}/astype.py +0 -0
  129. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  130. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  131. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  132. /maxframe/tensor/{base → misc}/unique.py +0 -0
  133. /maxframe/tensor/{base → misc}/where.py +0 -0
  134. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc4.dist-info}/top_level.txt +0 -0
@@ -19,14 +19,18 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union
19
19
  import pandas as pd
20
20
  import pyarrow as pa
21
21
  from odps import ODPS
22
- from odps.models import ExternalVolume, PartedVolume
22
+ from odps.models import ExternalVolume
23
23
  from odps.tunnel import TableTunnel
24
- from tornado import httpclient
25
24
 
26
25
  from maxframe.core import OBJECT_TYPE
27
26
  from maxframe.dataframe.core import DATAFRAME_TYPE
28
- from maxframe.lib import wrapped_pickle as pickle
29
- from maxframe.odpsio import ODPSTableIO, arrow_to_pandas, build_dataframe_table_meta
27
+ from maxframe.io.objects import get_object_io_handler
28
+ from maxframe.io.odpsio import (
29
+ ODPSTableIO,
30
+ ODPSVolumeReader,
31
+ arrow_to_pandas,
32
+ build_dataframe_table_meta,
33
+ )
30
34
  from maxframe.protocol import (
31
35
  DataFrameTableMeta,
32
36
  ODPSTableResultInfo,
@@ -36,7 +40,7 @@ from maxframe.protocol import (
36
40
  )
37
41
  from maxframe.tensor.core import TENSOR_TYPE
38
42
  from maxframe.typing_ import PandasObjectTypes, TileableType
39
- from maxframe.utils import ToThreadMixin
43
+ from maxframe.utils import ToThreadMixin, sync_pyodps_options
40
44
 
41
45
  _result_fetchers: Dict[ResultType, Type["ResultFetcher"]] = dict()
42
46
 
@@ -116,13 +120,15 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
116
120
 
117
121
  if tileable.shape and any(pd.isna(x) for x in tileable.shape):
118
122
  part_specs = [None] if not info.partition_specs else info.partition_specs
119
- tunnel = TableTunnel(self._odps_entry)
120
- total_records = 0
121
- for part_spec in part_specs:
122
- session = tunnel.create_download_session(
123
- info.full_table_name, part_spec
124
- )
125
- total_records += session.count
123
+
124
+ with sync_pyodps_options():
125
+ table = self._odps_entry.get_table(info.full_table_name)
126
+ tunnel = TableTunnel(self._odps_entry)
127
+ total_records = 0
128
+ for part_spec in part_specs:
129
+ session = tunnel.create_download_session(table, part_spec)
130
+ total_records += session.count
131
+
126
132
  new_shape_list = list(tileable.shape)
127
133
  new_shape_list[0] = total_records
128
134
  tileable.params = {"shape": tuple(new_shape_list)}
@@ -222,47 +228,24 @@ class ODPSVolumeFetcher(ToThreadMixin, ResultFetcher):
222
228
  ) -> None:
223
229
  return
224
230
 
225
- async def _read_parted_volume_data(
226
- self, volume: PartedVolume, partition: str, file_name: str
227
- ) -> bytes:
228
- def sync_read():
229
- with volume.open_reader(partition, file_name) as reader:
230
- return reader.read()
231
-
232
- return await self.to_thread(sync_read)
233
-
234
- async def _read_external_volume_data(
235
- self, volume: ExternalVolume, path: str, file_name: str
236
- ) -> bytes:
237
- signed_url = await self.to_thread(
238
- volume.get_sign_url, path + "/" + file_name, "GET"
239
- )
240
- http_client = httpclient.AsyncHTTPClient()
241
-
242
- resp = await http_client.fetch(signed_url)
243
- if hasattr(resp, "status_code") and resp.code >= 400:
244
- try:
245
- import oss2.exceptions
246
-
247
- oss_exc = oss2.exceptions.make_exception(resp.body)
248
- raise oss_exc
249
- except ImportError:
250
- raise SystemError(resp.body)
251
- return resp.body
231
+ async def _fetch_object(
232
+ self,
233
+ tileable: TileableType,
234
+ info: ODPSVolumeResultInfo,
235
+ indexes: List[Union[Integral, slice]],
236
+ ) -> Any:
237
+ def volume_fetch_func():
238
+ reader = ODPSVolumeReader(
239
+ self._odps_entry, info.volume_name, info.volume_path
240
+ )
241
+ io_handler = get_object_io_handler(tileable)()
242
+ return io_handler.read_object(reader, tileable, indexes)
252
243
 
253
- async def _fetch_object(self, info: ODPSVolumeResultInfo) -> Any:
254
244
  volume = await self.to_thread(self._odps_entry.get_volume, info.volume_name)
255
- if isinstance(volume, PartedVolume):
256
- byte_data = await self._read_parted_volume_data(
257
- volume, info.volume_path, "data"
258
- )
259
- elif isinstance(volume, ExternalVolume):
260
- byte_data = await self._read_external_volume_data(
261
- volume, info.volume_path, "data"
262
- )
245
+ if isinstance(volume, ExternalVolume):
246
+ return await self.to_thread(volume_fetch_func)
263
247
  else:
264
248
  raise NotImplementedError(f"Volume type {type(volume)} not supported")
265
- return pickle.loads(byte_data)
266
249
 
267
250
  async def fetch(
268
251
  self,
@@ -271,5 +254,5 @@ class ODPSVolumeFetcher(ToThreadMixin, ResultFetcher):
271
254
  indexes: List[Union[Integral, slice]],
272
255
  ) -> Any:
273
256
  if isinstance(tileable, (OBJECT_TYPE, TENSOR_TYPE)):
274
- return await self._fetch_object(info)
257
+ return await self._fetch_object(tileable, info, indexes)
275
258
  raise NotImplementedError(f"Fetching {type(tileable)} not implemented")
@@ -12,6 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ # retry consts
16
+ EMPTY_RESPONSE_RETRY_COUNT = 5
17
+
15
18
  # Restful Service
16
19
  RESTFUL_SESSION_INSECURE_SCHEME = "mf"
17
20
  RESTFUL_SESSION_SECURE_SCHEME = "mfs"
@@ -19,10 +19,16 @@ from dataclasses import dataclass
19
19
  from typing import Any, Dict, List, Tuple, Union
20
20
  from weakref import WeakSet
21
21
 
22
- from maxframe.core import ChunkType, TileableGraph, TileableType, enter_mode
22
+ from maxframe.core import (
23
+ ChunkType,
24
+ TileableGraph,
25
+ TileableType,
26
+ build_fetch,
27
+ enter_mode,
28
+ )
23
29
  from maxframe.core.operator import Fetch
24
30
  from maxframe.session import AbstractSession
25
- from maxframe.utils import build_fetch, copy_tileables
31
+ from maxframe.utils import copy_tileables
26
32
 
27
33
  logger = logging.getLogger(__name__)
28
34
 
@@ -18,15 +18,17 @@ import logging
18
18
  import time
19
19
  import weakref
20
20
  from numbers import Integral
21
- from typing import Dict, List, Mapping, Optional, Tuple, Union
21
+ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
22
22
  from urllib.parse import urlparse
23
23
 
24
24
  import numpy as np
25
25
  import pandas as pd
26
26
  from odps import ODPS
27
+ from odps import options as odps_options
27
28
 
28
29
  from maxframe.config import options
29
- from maxframe.core import Entity, TileableGraph, enter_mode
30
+ from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
31
+ from maxframe.core.operator import Fetch
30
32
  from maxframe.dataframe import read_odps_table
31
33
  from maxframe.dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
32
34
  from maxframe.dataframe.datasource import PandasDataSourceOperator
@@ -36,11 +38,18 @@ from maxframe.errors import (
36
38
  NoTaskServerResponseError,
37
39
  SessionAlreadyClosedError,
38
40
  )
39
- from maxframe.odpsio import ODPSTableIO, pandas_to_arrow, pandas_to_odps_schema
41
+ from maxframe.io.objects import get_object_io_handler
42
+ from maxframe.io.odpsio import (
43
+ ODPSTableIO,
44
+ ODPSVolumeWriter,
45
+ pandas_to_arrow,
46
+ pandas_to_odps_schema,
47
+ )
40
48
  from maxframe.protocol import (
41
49
  DagInfo,
42
50
  DagStatus,
43
51
  ODPSTableResultInfo,
52
+ ODPSVolumeResultInfo,
44
53
  ResultInfo,
45
54
  SessionInfo,
46
55
  )
@@ -51,8 +60,15 @@ from maxframe.session import (
51
60
  Profiling,
52
61
  Progress,
53
62
  )
63
+ from maxframe.tensor.datasource import ArrayDataSource
54
64
  from maxframe.typing_ import TileableType
55
- from maxframe.utils import ToThreadMixin, build_temp_table_name
65
+ from maxframe.utils import (
66
+ ToThreadMixin,
67
+ build_session_volume_name,
68
+ build_temp_table_name,
69
+ str_to_bool,
70
+ sync_pyodps_options,
71
+ )
56
72
 
57
73
  from ..clients.framedriver import FrameDriverClient
58
74
  from ..fetcher import get_fetcher_cls
@@ -63,6 +79,43 @@ logger = logging.getLogger(__name__)
63
79
 
64
80
 
65
81
  class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
82
+ def get_settings_to_upload(self) -> Dict[str, Any]:
83
+ sql_settings = (odps_options.sql.settings or {}).copy()
84
+ sql_settings.update(options.sql.settings or {})
85
+
86
+ quota_name = options.session.quota_name or getattr(
87
+ odps_options, "quota_name", None
88
+ )
89
+ lifecycle = options.session.table_lifecycle or odps_options.lifecycle
90
+ temp_lifecycle = (
91
+ options.session.temp_table_lifecycle or odps_options.temp_lifecycle
92
+ )
93
+
94
+ enable_schema = options.session.enable_schema
95
+ default_schema = options.session.default_schema
96
+ if hasattr(self, "_odps_entry"):
97
+ default_schema = default_schema or self._odps_entry.schema
98
+
99
+ # use flags in sql settings
100
+ if sql_settings.get("odps.default.schema"):
101
+ default_schema = sql_settings["odps.default.schema"]
102
+ if str_to_bool(
103
+ sql_settings.get("odps.namespace.schema") or "false"
104
+ ) or str_to_bool(
105
+ sql_settings.get("odps.sql.allow.namespace.schema") or "false"
106
+ ):
107
+ enable_schema = True
108
+
109
+ mf_settings = dict(options.to_dict(remote_only=True).items())
110
+ mf_settings["sql.settings"] = sql_settings
111
+ mf_settings["session.table_lifecycle"] = lifecycle
112
+ mf_settings["session.temp_table_lifecycle"] = temp_lifecycle
113
+ mf_settings["session.quota_name"] = quota_name
114
+ if enable_schema is not None:
115
+ mf_settings["session.enable_schema"] = enable_schema
116
+ mf_settings["session.default_schema"] = default_schema or "default"
117
+ return mf_settings
118
+
66
119
  @abc.abstractmethod
67
120
  def create_session(self) -> SessionInfo:
68
121
  raise NotImplementedError
@@ -73,7 +126,10 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
73
126
 
74
127
  @abc.abstractmethod
75
128
  def submit_dag(
76
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
129
+ self,
130
+ dag: TileableGraph,
131
+ managed_input_infos: Dict[str, ResultInfo],
132
+ new_settings: Dict[str, Any] = None,
77
133
  ) -> DagInfo:
78
134
  raise NotImplementedError
79
135
 
@@ -127,6 +183,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
127
183
  self._tileable_to_infos = weakref.WeakKeyDictionary()
128
184
 
129
185
  self._caller = self._create_caller(odps_entry, address, **kwargs)
186
+ self._last_settings = None
130
187
 
131
188
  @classmethod
132
189
  def _create_caller(
@@ -136,18 +193,14 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
136
193
 
137
194
  async def _init(self, _address: str):
138
195
  session_info = await self.ensure_async_call(self._caller.create_session)
196
+ self._last_settings = self._caller.get_settings_to_upload()
139
197
  self._session_id = session_info.session_id
140
198
  await self._show_logview_address()
141
199
 
142
- def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
143
- if (
144
- not isinstance(t.op, PandasDataSourceOperator)
145
- or t.op.get_data() is None
146
- or t.inputs
147
- ):
148
- return None
149
-
150
- schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
200
+ def _upload_and_get_table_read_tileable(
201
+ self, t: TileableType
202
+ ) -> Optional[TileableType]:
203
+ table_schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
151
204
  if self._odps_entry.exist_table(table_meta.table_name):
152
205
  self._odps_entry.delete_table(
153
206
  table_meta.table_name, hints=options.sql.settings
@@ -155,7 +208,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
155
208
  table_name = build_temp_table_name(self.session_id, t.key)
156
209
  table_obj = self._odps_entry.create_table(
157
210
  table_name,
158
- schema,
211
+ table_schema,
159
212
  lifecycle=options.session.temp_table_lifecycle,
160
213
  hints=options.sql.settings,
161
214
  )
@@ -193,8 +246,30 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
193
246
  read_tileable.params = t.params
194
247
  return read_tileable.data
195
248
 
249
+ def _upload_and_get_vol_read_tileable(
250
+ self, t: TileableType
251
+ ) -> Optional[TileableType]:
252
+ vol_name = build_session_volume_name(self.session_id)
253
+ writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
254
+ io_handler = get_object_io_handler(t)
255
+ io_handler().write_object(writer, t, t.op.data)
256
+ return build_fetch(t).data
257
+
258
+ def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
259
+ if (
260
+ not isinstance(t.op, (ArrayDataSource, PandasDataSourceOperator))
261
+ or t.op.get_data() is None
262
+ or t.inputs
263
+ ):
264
+ return None
265
+ with sync_pyodps_options():
266
+ if isinstance(t.op, PandasDataSourceOperator):
267
+ return self._upload_and_get_table_read_tileable(t)
268
+ else:
269
+ return self._upload_and_get_vol_read_tileable(t)
270
+
196
271
  @enter_mode(kernel=True, build=True)
197
- def _scan_and_replace_pandas_sources(
272
+ def _scan_and_replace_local_sources(
198
273
  self, graph: TileableGraph
199
274
  ) -> Dict[TileableType, TileableType]:
200
275
  """Replaces Pandas data sources with temp table sources in the graph"""
@@ -215,7 +290,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
215
290
 
216
291
  for succ in successors:
217
292
  graph.add_edge(replaced, succ)
218
- succ.inputs = [replacements.get(t, t) for t in succ.inputs]
293
+ succ.op._set_inputs([replacements.get(t, t) for t in succ.inputs])
219
294
 
220
295
  graph.results = [replacements.get(t, t) for t in graph.results]
221
296
  return replacements
@@ -223,16 +298,41 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
223
298
  @enter_mode(kernel=True, build=True)
224
299
  def _get_input_infos(self, tileables: List[TileableType]) -> Dict[str, ResultInfo]:
225
300
  """Generate ResultInfo structs from generated temp tables"""
301
+ vol_name = build_session_volume_name(self.session_id)
302
+
226
303
  infos = dict()
227
304
  for t in tileables:
228
305
  key = t.key
229
- if not isinstance(t.op, DataFrameReadODPSTable):
230
- if not isinstance(t.inputs[0].op, DataFrameReadODPSTable):
231
- continue
232
- t = t.inputs[0]
233
- infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
306
+ if isinstance(t.op, DataFrameReadODPSTable):
307
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
308
+ else:
309
+ if isinstance(t.op, Fetch):
310
+ infos[key] = ODPSVolumeResultInfo(
311
+ volume_name=vol_name, volume_path=t.key
312
+ )
313
+ elif t.inputs and isinstance(t.inputs[0].op, DataFrameReadODPSTable):
314
+ t = t.inputs[0]
315
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
234
316
  return infos
235
317
 
318
+ def _get_diff_settings(self) -> Dict[str, Any]:
319
+ new_settings = self._caller.get_settings_to_upload()
320
+ if not self._last_settings: # pragma: no cover
321
+ self._last_settings = new_settings
322
+ return new_settings
323
+
324
+ update = dict()
325
+ for k in new_settings.keys():
326
+ old_item = self._last_settings.get(k)
327
+ new_item = new_settings.get(k)
328
+ try:
329
+ if old_item != new_item:
330
+ update[k] = new_item
331
+ except: # noqa: E722 # nosec # pylint: disable=bare-except
332
+ update[k] = new_item
333
+ self._last_settings = new_settings
334
+ return update
335
+
236
336
  async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
237
337
  tileables = [
238
338
  tileable.data if isinstance(tileable, Entity) else tileable
@@ -242,7 +342,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
242
342
  tileable_graph, to_execute_tileables = gen_submit_tileable_graph(
243
343
  self, tileables, tileable_to_copied
244
344
  )
245
- source_replacements = self._scan_and_replace_pandas_sources(tileable_graph)
345
+ source_replacements = self._scan_and_replace_local_sources(tileable_graph)
246
346
 
247
347
  # we need to manage uploaded data sources with refcounting mechanism
248
348
  # as nodes in tileable_graph are copied, we need to use original nodes
@@ -252,7 +352,10 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
252
352
 
253
353
  replaced_infos = self._get_input_infos(list(source_replacements.values()))
254
354
  dag_info = await self.ensure_async_call(
255
- self._caller.submit_dag, tileable_graph, replaced_infos
355
+ self._caller.submit_dag,
356
+ tileable_graph,
357
+ replaced_infos,
358
+ self._get_diff_settings(),
256
359
  )
257
360
 
258
361
  await self._show_logview_address(dag_info.dag_id)
@@ -462,7 +565,8 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
462
565
  _client: FrameDriverClient
463
566
  _session_id: Optional[str]
464
567
 
465
- def __init__(self, client: FrameDriverClient):
568
+ def __init__(self, odps_entry: ODPS, client: FrameDriverClient):
569
+ self._odps_entry = odps_entry
466
570
  self._client = client
467
571
  self._session_id = None
468
572
 
@@ -475,7 +579,10 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
475
579
  await self._client.delete_session(self._session_id)
476
580
 
477
581
  async def submit_dag(
478
- self, dag: TileableGraph, managed_input_infos: Dict[str, ResultInfo]
582
+ self,
583
+ dag: TileableGraph,
584
+ managed_input_infos: Dict[str, ResultInfo] = None,
585
+ new_settings: Dict[str, Any] = None,
479
586
  ) -> DagInfo:
480
587
  return await self._client.submit_dag(self._session_id, dag, managed_input_infos)
481
588
 
@@ -515,7 +622,7 @@ class MaxFrameRestSession(MaxFrameSession):
515
622
 
516
623
  @classmethod
517
624
  def _create_caller(cls, odps_entry: ODPS, address: str, **kwargs):
518
- return MaxFrameRestCaller(FrameDriverClient(address))
625
+ return MaxFrameRestCaller(odps_entry, FrameDriverClient(address))
519
626
 
520
627
 
521
628
  def register_session_schemes(overwrite: bool = False):
@@ -16,7 +16,7 @@ import base64
16
16
  import json
17
17
  import logging
18
18
  import time
19
- from typing import Dict, List, Optional, Type, Union
19
+ from typing import Any, Dict, List, Optional, Type, Union
20
20
 
21
21
  import msgpack
22
22
  from odps import ODPS
@@ -24,6 +24,12 @@ from odps import options as odps_options
24
24
  from odps.errors import parse_instance_error
25
25
  from odps.models import Instance, MaxFrameTask
26
26
 
27
+ try:
28
+ from odps.errors import EmptyTaskInfoError
29
+ except ImportError: # pragma: no cover
30
+ # todo remove when pyodps>=0.12.0 is enforced
31
+ EmptyTaskInfoError = type("EmptyTaskInfoError", (Exception,), {})
32
+
27
33
  from maxframe.config import options
28
34
  from maxframe.core import TileableGraph
29
35
  from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
@@ -36,6 +42,7 @@ except ImportError:
36
42
  mf_version = None
37
43
 
38
44
  from .consts import (
45
+ EMPTY_RESPONSE_RETRY_COUNT,
39
46
  MAXFRAME_DEFAULT_PROTOCOL,
40
47
  MAXFRAME_OUTPUT_JSON_FORMAT,
41
48
  MAXFRAME_OUTPUT_MAXFRAME_FORMAT,
@@ -92,6 +99,10 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
92
99
  self._nested = True
93
100
  self._instance = odps_entry.get_instance(nested_instance_id)
94
101
 
102
+ @property
103
+ def instance(self):
104
+ return self._instance
105
+
95
106
  def _deserial_task_info_result(
96
107
  self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
97
108
  ):
@@ -125,16 +136,8 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
125
136
  major_version=self._major_version,
126
137
  service_endpoint=self._odps_entry.endpoint,
127
138
  )
128
-
129
- # merge sql options
130
- sql_settings = (odps_options.sql.settings or {}).copy()
131
- sql_settings.update(options.sql.settings or {})
132
-
133
- mf_settings = dict(options.to_dict(remote_only=True).items())
134
- mf_settings["sql.settings"] = sql_settings
135
-
136
139
  mf_opts = {
137
- "odps.maxframe.settings": json.dumps(mf_settings),
140
+ "odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
138
141
  "odps.maxframe.output_format": self._output_format,
139
142
  }
140
143
  if mf_version:
@@ -189,18 +192,39 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
189
192
  interval = min(max_interval, interval * 2)
190
193
 
191
194
  def _put_task_info(self, method_name: str, json_data: dict):
192
- resp_data = self._instance.put_task_info(
193
- self._task_name, method_name, json.dumps(json_data)
194
- )
195
- if not resp_data:
196
- raise NoTaskServerResponseError(f"No response for request {method_name}")
197
- return resp_data
195
+ for trial in range(EMPTY_RESPONSE_RETRY_COUNT):
196
+ try:
197
+ return self._instance.put_task_info(
198
+ self._task_name,
199
+ method_name,
200
+ json.dumps(json_data),
201
+ raise_empty=True,
202
+ )
203
+ except TypeError: # pragma: no cover
204
+ # todo remove when pyodps>=0.12.0 is enforced
205
+ resp_data = self._instance.put_task_info(
206
+ self._task_name, method_name, json.dumps(json_data)
207
+ )
208
+ if resp_data:
209
+ return resp_data
210
+ else:
211
+ raise NoTaskServerResponseError(
212
+ f"No response for request {method_name}. "
213
+ f"Instance ID: {self._instance.id}"
214
+ )
215
+ except EmptyTaskInfoError as ex:
216
+ # retry when server returns HTTP 204, which is designed for retry
217
+ if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
218
+ raise NoTaskServerResponseError(
219
+ f"No response for request {method_name}. "
220
+ f"Instance ID: {self._instance.id}. "
221
+ f"Request ID: {ex.request_id}"
222
+ ) from None
223
+ time.sleep(0.5)
198
224
 
199
225
  def get_session(self) -> SessionInfo:
200
226
  req_data = {"output_format": self._output_format}
201
- serialized = self._instance.put_task_info(
202
- self._task_name, MAXFRAME_TASK_GET_SESSION_METHOD, json.dumps(req_data)
203
- )
227
+ serialized = self._put_task_info(MAXFRAME_TASK_GET_SESSION_METHOD, req_data)
204
228
  info: SessionInfo = self._deserial_task_info_result(serialized, SessionInfo)
205
229
  info.session_id = self._instance.id
206
230
  return info
@@ -217,13 +241,18 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
217
241
  self,
218
242
  dag: TileableGraph,
219
243
  managed_input_infos: Optional[Dict[str, ResultInfo]] = None,
244
+ new_settings: Dict[str, Any] = None,
220
245
  ) -> DagInfo:
246
+ new_settings_value = {
247
+ "odps.maxframe.settings": json.dumps(new_settings),
248
+ }
221
249
  req_data = {
222
250
  "protocol": MAXFRAME_DEFAULT_PROTOCOL,
223
251
  "dag": base64.b64encode(serialize_serializable(dag)).decode(),
224
252
  "managed_input_infos": base64.b64encode(
225
253
  serialize_serializable(managed_input_infos)
226
254
  ).decode(),
255
+ "new_settings": json.dumps(new_settings_value),
227
256
  "output_format": self._output_format,
228
257
  }
229
258
  res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
@@ -276,7 +305,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
276
305
  class MaxFrameTaskSession(MaxFrameSession):
277
306
  schemes = [ODPS_SESSION_INSECURE_SCHEME, ODPS_SESSION_SECURE_SCHEME]
278
307
 
279
- _instance: Instance
308
+ _caller: MaxFrameInstanceCaller
280
309
 
281
310
  @classmethod
282
311
  def _create_caller(
@@ -296,6 +325,15 @@ class MaxFrameTaskSession(MaxFrameSession):
296
325
  **kwargs,
297
326
  )
298
327
 
328
+ @property
329
+ def closed(self) -> bool:
330
+ if super().closed:
331
+ return True
332
+ if not self._caller or not self._caller.instance:
333
+ # session not initialized yet
334
+ return False
335
+ return self._caller.instance.is_terminated()
336
+
299
337
 
300
338
  def register_session_schemes(overwrite: bool = False):
301
339
  MaxFrameTaskSession.register_schemes(overwrite=overwrite)
@@ -22,7 +22,7 @@ from odps import ODPS
22
22
 
23
23
  import maxframe.dataframe as md
24
24
  from maxframe.config import options
25
- from maxframe.odpsio import ODPSTableIO
25
+ from maxframe.io.odpsio import ODPSTableIO
26
26
  from maxframe.protocol import ODPSTableResultInfo, ResultType
27
27
  from maxframe.tests.utils import tn
28
28
 
@@ -137,6 +137,15 @@ def test_simple_run_dataframe(start_mock_session):
137
137
  assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
138
138
 
139
139
 
140
+ def test_run_and_fetch_slice(start_mock_session):
141
+ pd_df = pd.DataFrame(np.random.rand(1000, 5), columns=list("ABCDE"))
142
+ df = md.DataFrame(pd_df)
143
+ result = df.execute()
144
+
145
+ sliced = result.head(10).fetch()
146
+ assert len(sliced) == 10
147
+
148
+
140
149
  def test_run_empty_table(start_mock_session):
141
150
  odps_entry = ODPS.from_environments()
142
151
 
@@ -189,7 +198,7 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
189
198
  table_name = build_temp_table_name(start_mock_session, "tmp_save")
190
199
  table_obj = odps_entry.get_table(table_name)
191
200
  try:
192
- md.to_odps_table(md.DataFrame(pd_df), table_obj).execute().fetch()
201
+ md.to_odps_table(md.DataFrame(pd_df), table_obj, lifecycle=1).execute().fetch()
193
202
  with table_obj.open_reader() as reader:
194
203
  result_df = reader.to_pandas()
195
204
  assert len(result_df) == 10
@@ -247,7 +256,22 @@ def test_run_and_fetch_series(start_mock_session):
247
256
  )
248
257
 
249
258
 
250
- def test_run_remote_success(start_mock_session):
259
+ def test_execute_with_tensor(oss_config, start_mock_session):
260
+ pd_df = pd.DataFrame(
261
+ {"angles": [0, 3, 4], "degrees": [360, 180, 360]},
262
+ index=["circle", "triangle", "rectangle"],
263
+ )
264
+ df = md.DataFrame(pd_df)
265
+
266
+ result = (df - [1, 2]).execute().fetch()
267
+ expected = pd_df - [1, 2]
268
+ # TODO: currently the record order in tensor reading from table is the index
269
+ # sorting order
270
+ expected.sort_index(axis=0, inplace=True)
271
+ pd.testing.assert_frame_equal(result, expected, check_like=True)
272
+
273
+
274
+ def test_run_remote_success(oss_config, start_mock_session):
251
275
  def func(a, b):
252
276
  return a + b
253
277
 
@@ -258,7 +282,7 @@ def test_run_remote_success(start_mock_session):
258
282
  assert result == 21
259
283
 
260
284
 
261
- def test_run_remote_error(start_mock_session):
285
+ def test_run_remote_error(oss_config, start_mock_session):
262
286
  def func():
263
287
  raise ValueError
264
288