maxframe 1.0.0rc3__cp310-cp310-win_amd64.whl → 1.1.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp310-win_amd64.pyd +0 -0
- maxframe/codegen.py +1 -0
- maxframe/config/config.py +16 -1
- maxframe/conftest.py +52 -14
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
- maxframe/core/operator/base.py +2 -0
- maxframe/dataframe/arithmetic/docstring.py +26 -2
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
- maxframe/dataframe/core.py +26 -2
- maxframe/dataframe/datasource/read_odps_query.py +116 -28
- maxframe/dataframe/datasource/read_odps_table.py +3 -1
- maxframe/dataframe/datasource/tests/test_datasource.py +93 -12
- maxframe/dataframe/datastore/to_odps.py +7 -0
- maxframe/dataframe/extensions/__init__.py +8 -0
- maxframe/dataframe/extensions/apply_chunk.py +649 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +314 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/__init__.py +1 -0
- maxframe/dataframe/groupby/aggregation.py +1 -0
- maxframe/dataframe/groupby/apply.py +9 -1
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
- maxframe/dataframe/groupby/transform.py +8 -2
- maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +11 -0
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +1 -1
- maxframe/dataframe/merge/tests/test_merge.py +3 -1
- maxframe/dataframe/misc/apply.py +3 -0
- maxframe/dataframe/misc/drop_duplicates.py +23 -2
- maxframe/dataframe/misc/map.py +3 -1
- maxframe/dataframe/misc/tests/test_misc.py +24 -2
- maxframe/dataframe/misc/transform.py +22 -13
- maxframe/dataframe/reduction/__init__.py +3 -0
- maxframe/dataframe/reduction/aggregation.py +1 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
- maxframe/dataframe/statistics/quantile.py +8 -2
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/tests/test_utils.py +60 -0
- maxframe/dataframe/utils.py +110 -7
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +39 -12
- maxframe/io/odpsio/arrow.py +30 -2
- maxframe/io/odpsio/schema.py +28 -8
- maxframe/io/odpsio/tableio.py +55 -133
- maxframe/io/odpsio/tests/test_schema.py +40 -4
- maxframe/io/odpsio/tests/test_tableio.py +5 -5
- maxframe/io/odpsio/tests/test_volumeio.py +35 -11
- maxframe/io/odpsio/volumeio.py +36 -6
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +3 -3
- maxframe/learn/contrib/xgboost/predict.py +8 -39
- maxframe/learn/contrib/xgboost/train.py +4 -3
- maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/opcodes.py +10 -1
- maxframe/protocol.py +6 -1
- maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyx +13 -1
- maxframe/serialization/pandas.py +50 -20
- maxframe/serialization/serializables/core.py +24 -5
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +8 -1
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +19 -7
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/concatenate.py +23 -20
- maxframe/tensor/merge/vstack.py +5 -1
- maxframe/tensor/misc/transpose.py +1 -1
- maxframe/tests/utils.py +16 -0
- maxframe/udf.py +27 -0
- maxframe/utils.py +64 -14
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/RECORD +112 -96
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +28 -10
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/odps.py +104 -20
- maxframe_client/session/task.py +42 -26
- maxframe_client/session/tests/test_task.py +0 -4
- maxframe_client/tests/test_session.py +44 -12
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
maxframe_client/session/odps.py
CHANGED
|
@@ -14,16 +14,19 @@
|
|
|
14
14
|
|
|
15
15
|
import abc
|
|
16
16
|
import asyncio
|
|
17
|
+
import copy
|
|
17
18
|
import logging
|
|
18
19
|
import time
|
|
19
20
|
import weakref
|
|
20
21
|
from numbers import Integral
|
|
21
|
-
from typing import Dict, List, Mapping, Optional, Tuple, Union
|
|
22
|
+
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
|
|
22
23
|
from urllib.parse import urlparse
|
|
23
24
|
|
|
24
25
|
import numpy as np
|
|
25
26
|
import pandas as pd
|
|
26
27
|
from odps import ODPS
|
|
28
|
+
from odps import options as odps_options
|
|
29
|
+
from odps.console import in_ipython_frontend
|
|
27
30
|
|
|
28
31
|
from maxframe.config import options
|
|
29
32
|
from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
|
|
@@ -65,6 +68,8 @@ from maxframe.utils import (
|
|
|
65
68
|
ToThreadMixin,
|
|
66
69
|
build_session_volume_name,
|
|
67
70
|
build_temp_table_name,
|
|
71
|
+
str_to_bool,
|
|
72
|
+
sync_pyodps_options,
|
|
68
73
|
)
|
|
69
74
|
|
|
70
75
|
from ..clients.framedriver import FrameDriverClient
|
|
@@ -76,6 +81,45 @@ logger = logging.getLogger(__name__)
|
|
|
76
81
|
|
|
77
82
|
|
|
78
83
|
class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
84
|
+
def get_settings_to_upload(self) -> Dict[str, Any]:
|
|
85
|
+
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
86
|
+
sql_settings.update(options.sql.settings or {})
|
|
87
|
+
|
|
88
|
+
quota_name = options.session.quota_name or getattr(
|
|
89
|
+
odps_options, "quota_name", None
|
|
90
|
+
)
|
|
91
|
+
lifecycle = options.session.table_lifecycle or odps_options.lifecycle
|
|
92
|
+
temp_lifecycle = (
|
|
93
|
+
options.session.temp_table_lifecycle or odps_options.temp_lifecycle
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
enable_schema = options.session.enable_schema
|
|
97
|
+
default_schema = options.session.default_schema
|
|
98
|
+
if hasattr(self, "_odps_entry"):
|
|
99
|
+
default_schema = default_schema or self._odps_entry.schema
|
|
100
|
+
|
|
101
|
+
# use flags in sql settings
|
|
102
|
+
if sql_settings.get("odps.default.schema"):
|
|
103
|
+
default_schema = sql_settings["odps.default.schema"]
|
|
104
|
+
if str_to_bool(
|
|
105
|
+
sql_settings.get("odps.namespace.schema") or "false"
|
|
106
|
+
) or str_to_bool(
|
|
107
|
+
sql_settings.get("odps.sql.allow.namespace.schema") or "false"
|
|
108
|
+
):
|
|
109
|
+
enable_schema = True
|
|
110
|
+
|
|
111
|
+
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
112
|
+
mf_settings["sql.settings"] = sql_settings
|
|
113
|
+
mf_settings["session.table_lifecycle"] = lifecycle
|
|
114
|
+
mf_settings["session.temp_table_lifecycle"] = temp_lifecycle
|
|
115
|
+
mf_settings["session.quota_name"] = quota_name
|
|
116
|
+
if enable_schema is not None:
|
|
117
|
+
mf_settings["session.enable_schema"] = enable_schema
|
|
118
|
+
if options.session.enable_high_availability is None:
|
|
119
|
+
mf_settings["session.enable_high_availability"] = not in_ipython_frontend()
|
|
120
|
+
mf_settings["session.default_schema"] = default_schema or "default"
|
|
121
|
+
return mf_settings
|
|
122
|
+
|
|
79
123
|
@abc.abstractmethod
|
|
80
124
|
def create_session(self) -> SessionInfo:
|
|
81
125
|
raise NotImplementedError
|
|
@@ -86,7 +130,10 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
|
86
130
|
|
|
87
131
|
@abc.abstractmethod
|
|
88
132
|
def submit_dag(
|
|
89
|
-
self,
|
|
133
|
+
self,
|
|
134
|
+
dag: TileableGraph,
|
|
135
|
+
managed_input_infos: Dict[str, ResultInfo],
|
|
136
|
+
new_settings: Dict[str, Any] = None,
|
|
90
137
|
) -> DagInfo:
|
|
91
138
|
raise NotImplementedError
|
|
92
139
|
|
|
@@ -140,6 +187,9 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
140
187
|
self._tileable_to_infos = weakref.WeakKeyDictionary()
|
|
141
188
|
|
|
142
189
|
self._caller = self._create_caller(odps_entry, address, **kwargs)
|
|
190
|
+
self._last_settings = None
|
|
191
|
+
self._pull_interval = 1 if in_ipython_frontend() else 3
|
|
192
|
+
self._replace_internal_host = kwargs.get("replace_internal_host", True)
|
|
143
193
|
|
|
144
194
|
@classmethod
|
|
145
195
|
def _create_caller(
|
|
@@ -149,13 +199,14 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
149
199
|
|
|
150
200
|
async def _init(self, _address: str):
|
|
151
201
|
session_info = await self.ensure_async_call(self._caller.create_session)
|
|
202
|
+
self._last_settings = copy.deepcopy(self._caller.get_settings_to_upload())
|
|
152
203
|
self._session_id = session_info.session_id
|
|
153
204
|
await self._show_logview_address()
|
|
154
205
|
|
|
155
206
|
def _upload_and_get_table_read_tileable(
|
|
156
207
|
self, t: TileableType
|
|
157
208
|
) -> Optional[TileableType]:
|
|
158
|
-
|
|
209
|
+
table_schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
|
|
159
210
|
if self._odps_entry.exist_table(table_meta.table_name):
|
|
160
211
|
self._odps_entry.delete_table(
|
|
161
212
|
table_meta.table_name, hints=options.sql.settings
|
|
@@ -163,7 +214,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
163
214
|
table_name = build_temp_table_name(self.session_id, t.key)
|
|
164
215
|
table_obj = self._odps_entry.create_table(
|
|
165
216
|
table_name,
|
|
166
|
-
|
|
217
|
+
table_schema,
|
|
167
218
|
lifecycle=options.session.temp_table_lifecycle,
|
|
168
219
|
hints=options.sql.settings,
|
|
169
220
|
)
|
|
@@ -205,7 +256,12 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
205
256
|
self, t: TileableType
|
|
206
257
|
) -> Optional[TileableType]:
|
|
207
258
|
vol_name = build_session_volume_name(self.session_id)
|
|
208
|
-
writer = ODPSVolumeWriter(
|
|
259
|
+
writer = ODPSVolumeWriter(
|
|
260
|
+
self._odps_entry,
|
|
261
|
+
vol_name,
|
|
262
|
+
t.key,
|
|
263
|
+
replace_internal_host=self._replace_internal_host,
|
|
264
|
+
)
|
|
209
265
|
io_handler = get_object_io_handler(t)
|
|
210
266
|
io_handler().write_object(writer, t, t.op.data)
|
|
211
267
|
return build_fetch(t).data
|
|
@@ -217,10 +273,11 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
217
273
|
or t.inputs
|
|
218
274
|
):
|
|
219
275
|
return None
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
276
|
+
with sync_pyodps_options():
|
|
277
|
+
if isinstance(t.op, PandasDataSourceOperator):
|
|
278
|
+
return self._upload_and_get_table_read_tileable(t)
|
|
279
|
+
else:
|
|
280
|
+
return self._upload_and_get_vol_read_tileable(t)
|
|
224
281
|
|
|
225
282
|
@enter_mode(kernel=True, build=True)
|
|
226
283
|
def _scan_and_replace_local_sources(
|
|
@@ -244,7 +301,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
244
301
|
|
|
245
302
|
for succ in successors:
|
|
246
303
|
graph.add_edge(replaced, succ)
|
|
247
|
-
succ.
|
|
304
|
+
succ.op._set_inputs([replacements.get(t, t) for t in succ.inputs])
|
|
248
305
|
|
|
249
306
|
graph.results = [replacements.get(t, t) for t in graph.results]
|
|
250
307
|
return replacements
|
|
@@ -269,6 +326,24 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
269
326
|
infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
|
|
270
327
|
return infos
|
|
271
328
|
|
|
329
|
+
def _get_diff_settings(self) -> Dict[str, Any]:
|
|
330
|
+
new_settings = self._caller.get_settings_to_upload()
|
|
331
|
+
if not self._last_settings: # pragma: no cover
|
|
332
|
+
self._last_settings = copy.deepcopy(new_settings)
|
|
333
|
+
return new_settings
|
|
334
|
+
|
|
335
|
+
update = dict()
|
|
336
|
+
for k in new_settings.keys():
|
|
337
|
+
old_item = self._last_settings.get(k)
|
|
338
|
+
new_item = new_settings.get(k)
|
|
339
|
+
try:
|
|
340
|
+
if old_item != new_item:
|
|
341
|
+
update[k] = new_item
|
|
342
|
+
except: # noqa: E722 # nosec # pylint: disable=bare-except
|
|
343
|
+
update[k] = new_item
|
|
344
|
+
self._last_settings = copy.deepcopy(new_settings)
|
|
345
|
+
return update
|
|
346
|
+
|
|
272
347
|
async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
|
|
273
348
|
tileables = [
|
|
274
349
|
tileable.data if isinstance(tileable, Entity) else tileable
|
|
@@ -288,7 +363,10 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
288
363
|
|
|
289
364
|
replaced_infos = self._get_input_infos(list(source_replacements.values()))
|
|
290
365
|
dag_info = await self.ensure_async_call(
|
|
291
|
-
self._caller.submit_dag,
|
|
366
|
+
self._caller.submit_dag,
|
|
367
|
+
tileable_graph,
|
|
368
|
+
replaced_infos,
|
|
369
|
+
self._get_diff_settings(),
|
|
292
370
|
)
|
|
293
371
|
|
|
294
372
|
await self._show_logview_address(dag_info.dag_id)
|
|
@@ -312,18 +390,18 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
312
390
|
start_time = time.time()
|
|
313
391
|
session_id = dag_info.session_id
|
|
314
392
|
dag_id = dag_info.dag_id
|
|
315
|
-
wait_timeout = 10
|
|
316
393
|
server_no_response_time = None
|
|
317
394
|
with enter_mode(build=True, kernel=True):
|
|
318
395
|
key_to_tileables = {t.key: t for t in tileables}
|
|
319
|
-
|
|
396
|
+
timeout_val = 0.1
|
|
320
397
|
try:
|
|
321
398
|
while True:
|
|
322
399
|
elapsed_time = time.time() - start_time
|
|
400
|
+
next_timeout_val = min(timeout_val * 2, self._pull_interval)
|
|
323
401
|
timeout_val = (
|
|
324
|
-
min(self.timeout - elapsed_time,
|
|
402
|
+
min(self.timeout - elapsed_time, next_timeout_val)
|
|
325
403
|
if self.timeout
|
|
326
|
-
else
|
|
404
|
+
else next_timeout_val
|
|
327
405
|
)
|
|
328
406
|
if timeout_val <= 0:
|
|
329
407
|
raise TimeoutError("Running DAG timed out")
|
|
@@ -498,7 +576,8 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
|
|
|
498
576
|
_client: FrameDriverClient
|
|
499
577
|
_session_id: Optional[str]
|
|
500
578
|
|
|
501
|
-
def __init__(self, client: FrameDriverClient):
|
|
579
|
+
def __init__(self, odps_entry: ODPS, client: FrameDriverClient):
|
|
580
|
+
self._odps_entry = odps_entry
|
|
502
581
|
self._client = client
|
|
503
582
|
self._session_id = None
|
|
504
583
|
|
|
@@ -511,9 +590,14 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
|
|
|
511
590
|
await self._client.delete_session(self._session_id)
|
|
512
591
|
|
|
513
592
|
async def submit_dag(
|
|
514
|
-
self,
|
|
593
|
+
self,
|
|
594
|
+
dag: TileableGraph,
|
|
595
|
+
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
596
|
+
new_settings: Dict[str, Any] = None,
|
|
515
597
|
) -> DagInfo:
|
|
516
|
-
return await self._client.submit_dag(
|
|
598
|
+
return await self._client.submit_dag(
|
|
599
|
+
self._session_id, dag, managed_input_infos, new_settings=new_settings
|
|
600
|
+
)
|
|
517
601
|
|
|
518
602
|
async def get_dag_info(self, dag_id: str) -> DagInfo:
|
|
519
603
|
return await self._client.get_dag_info(self._session_id, dag_id)
|
|
@@ -546,12 +630,12 @@ class MaxFrameRestSession(MaxFrameSession):
|
|
|
546
630
|
real_endpoint = address.replace(f"{parsed_endpoint.scheme}://", f"{scheme}://")
|
|
547
631
|
|
|
548
632
|
super().__init__(
|
|
549
|
-
real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout
|
|
633
|
+
real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout, **kwargs
|
|
550
634
|
)
|
|
551
635
|
|
|
552
636
|
@classmethod
|
|
553
637
|
def _create_caller(cls, odps_entry: ODPS, address: str, **kwargs):
|
|
554
|
-
return MaxFrameRestCaller(FrameDriverClient(address))
|
|
638
|
+
return MaxFrameRestCaller(odps_entry, FrameDriverClient(address))
|
|
555
639
|
|
|
556
640
|
|
|
557
641
|
def register_session_schemes(overwrite: bool = False):
|
maxframe_client/session/task.py
CHANGED
|
@@ -16,12 +16,12 @@ import base64
|
|
|
16
16
|
import json
|
|
17
17
|
import logging
|
|
18
18
|
import time
|
|
19
|
-
from typing import Dict, List, Optional, Type, Union
|
|
19
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
20
20
|
|
|
21
21
|
import msgpack
|
|
22
22
|
from odps import ODPS
|
|
23
23
|
from odps import options as odps_options
|
|
24
|
-
from odps.errors import parse_instance_error
|
|
24
|
+
from odps.errors import EmptyTaskInfoError, parse_instance_error
|
|
25
25
|
from odps.models import Instance, MaxFrameTask
|
|
26
26
|
|
|
27
27
|
from maxframe.config import options
|
|
@@ -36,6 +36,7 @@ except ImportError:
|
|
|
36
36
|
mf_version = None
|
|
37
37
|
|
|
38
38
|
from .consts import (
|
|
39
|
+
EMPTY_RESPONSE_RETRY_COUNT,
|
|
39
40
|
MAXFRAME_DEFAULT_PROTOCOL,
|
|
40
41
|
MAXFRAME_OUTPUT_JSON_FORMAT,
|
|
41
42
|
MAXFRAME_OUTPUT_MAXFRAME_FORMAT,
|
|
@@ -92,6 +93,10 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
92
93
|
self._nested = True
|
|
93
94
|
self._instance = odps_entry.get_instance(nested_instance_id)
|
|
94
95
|
|
|
96
|
+
@property
|
|
97
|
+
def instance(self):
|
|
98
|
+
return self._instance
|
|
99
|
+
|
|
95
100
|
def _deserial_task_info_result(
|
|
96
101
|
self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
|
|
97
102
|
):
|
|
@@ -120,21 +125,9 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
120
125
|
)
|
|
121
126
|
|
|
122
127
|
def _create_maxframe_task(self) -> MaxFrameTask:
|
|
123
|
-
task = MaxFrameTask(
|
|
124
|
-
name=self._task_name,
|
|
125
|
-
major_version=self._major_version,
|
|
126
|
-
service_endpoint=self._odps_entry.endpoint,
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
# merge sql options
|
|
130
|
-
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
131
|
-
sql_settings.update(options.sql.settings or {})
|
|
132
|
-
|
|
133
|
-
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
134
|
-
mf_settings["sql.settings"] = sql_settings
|
|
135
|
-
|
|
128
|
+
task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
|
|
136
129
|
mf_opts = {
|
|
137
|
-
"odps.maxframe.settings": json.dumps(
|
|
130
|
+
"odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
|
|
138
131
|
"odps.maxframe.output_format": self._output_format,
|
|
139
132
|
}
|
|
140
133
|
if mf_version:
|
|
@@ -189,18 +182,27 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
189
182
|
interval = min(max_interval, interval * 2)
|
|
190
183
|
|
|
191
184
|
def _put_task_info(self, method_name: str, json_data: dict):
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
185
|
+
for trial in range(EMPTY_RESPONSE_RETRY_COUNT):
|
|
186
|
+
try:
|
|
187
|
+
return self._instance.put_task_info(
|
|
188
|
+
self._task_name,
|
|
189
|
+
method_name,
|
|
190
|
+
json.dumps(json_data),
|
|
191
|
+
raise_empty=True,
|
|
192
|
+
)
|
|
193
|
+
except EmptyTaskInfoError as ex:
|
|
194
|
+
# retry when server returns HTTP 204, which is designed for retry
|
|
195
|
+
if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
|
|
196
|
+
raise NoTaskServerResponseError(
|
|
197
|
+
f"No response for request {method_name}. "
|
|
198
|
+
f"Instance ID: {self._instance.id}. "
|
|
199
|
+
f"Request ID: {ex.request_id}"
|
|
200
|
+
) from None
|
|
201
|
+
time.sleep(0.5)
|
|
198
202
|
|
|
199
203
|
def get_session(self) -> SessionInfo:
|
|
200
204
|
req_data = {"output_format": self._output_format}
|
|
201
|
-
serialized = self.
|
|
202
|
-
self._task_name, MAXFRAME_TASK_GET_SESSION_METHOD, json.dumps(req_data)
|
|
203
|
-
)
|
|
205
|
+
serialized = self._put_task_info(MAXFRAME_TASK_GET_SESSION_METHOD, req_data)
|
|
204
206
|
info: SessionInfo = self._deserial_task_info_result(serialized, SessionInfo)
|
|
205
207
|
info.session_id = self._instance.id
|
|
206
208
|
return info
|
|
@@ -217,13 +219,18 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
217
219
|
self,
|
|
218
220
|
dag: TileableGraph,
|
|
219
221
|
managed_input_infos: Optional[Dict[str, ResultInfo]] = None,
|
|
222
|
+
new_settings: Dict[str, Any] = None,
|
|
220
223
|
) -> DagInfo:
|
|
224
|
+
new_settings_value = {
|
|
225
|
+
"odps.maxframe.settings": json.dumps(new_settings),
|
|
226
|
+
}
|
|
221
227
|
req_data = {
|
|
222
228
|
"protocol": MAXFRAME_DEFAULT_PROTOCOL,
|
|
223
229
|
"dag": base64.b64encode(serialize_serializable(dag)).decode(),
|
|
224
230
|
"managed_input_infos": base64.b64encode(
|
|
225
231
|
serialize_serializable(managed_input_infos)
|
|
226
232
|
).decode(),
|
|
233
|
+
"new_settings": json.dumps(new_settings_value),
|
|
227
234
|
"output_format": self._output_format,
|
|
228
235
|
}
|
|
229
236
|
res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
|
|
@@ -276,7 +283,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
276
283
|
class MaxFrameTaskSession(MaxFrameSession):
|
|
277
284
|
schemes = [ODPS_SESSION_INSECURE_SCHEME, ODPS_SESSION_SECURE_SCHEME]
|
|
278
285
|
|
|
279
|
-
|
|
286
|
+
_caller: MaxFrameInstanceCaller
|
|
280
287
|
|
|
281
288
|
@classmethod
|
|
282
289
|
def _create_caller(
|
|
@@ -296,6 +303,15 @@ class MaxFrameTaskSession(MaxFrameSession):
|
|
|
296
303
|
**kwargs,
|
|
297
304
|
)
|
|
298
305
|
|
|
306
|
+
@property
|
|
307
|
+
def closed(self) -> bool:
|
|
308
|
+
if super().closed:
|
|
309
|
+
return True
|
|
310
|
+
if not self._caller or not self._caller.instance:
|
|
311
|
+
# session not initialized yet
|
|
312
|
+
return False
|
|
313
|
+
return self._caller.instance.is_terminated()
|
|
314
|
+
|
|
299
315
|
|
|
300
316
|
def register_session_schemes(overwrite: bool = False):
|
|
301
317
|
MaxFrameTaskSession.register_schemes(overwrite=overwrite)
|
|
@@ -53,10 +53,6 @@ def test_maxframe_instance_caller_creating_session():
|
|
|
53
53
|
assert property_node.find("Name").text == "settings"
|
|
54
54
|
setting_dict = json.loads(property_node.find("Value").text)
|
|
55
55
|
assert setting_dict["odps.task.major.version"] == "test_version"
|
|
56
|
-
assert (
|
|
57
|
-
setting_dict["odps.service.endpoint"]
|
|
58
|
-
== "http://100.69.248.78:8002/odps_dailyrunnew"
|
|
59
|
-
)
|
|
60
56
|
|
|
61
57
|
assert setting_dict["odps.maxframe.output_format"] == "json"
|
|
62
58
|
maxframe_setting_dict = json.loads(setting_dict["odps.maxframe.settings"])
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
|
-
from typing import Dict
|
|
16
|
+
from typing import Any, Dict
|
|
17
17
|
|
|
18
18
|
import mock
|
|
19
19
|
import numpy as np
|
|
@@ -31,7 +31,7 @@ from maxframe.lib.aio import stop_isolation
|
|
|
31
31
|
from maxframe.protocol import ResultInfo
|
|
32
32
|
from maxframe.serialization import RemoteException
|
|
33
33
|
from maxframe.session import new_session
|
|
34
|
-
from maxframe.tests.utils import tn
|
|
34
|
+
from maxframe.tests.utils import ensure_table_deleted, tn
|
|
35
35
|
from maxframe.utils import build_temp_table_name
|
|
36
36
|
from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
|
|
37
37
|
framedriver_app,
|
|
@@ -86,9 +86,12 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
86
86
|
session_id: str,
|
|
87
87
|
dag: TileableGraph,
|
|
88
88
|
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
89
|
+
new_settings: Dict[str, Any] = None,
|
|
89
90
|
):
|
|
90
91
|
assert len(dag) == 2
|
|
91
|
-
return await original_submit_dag(
|
|
92
|
+
return await original_submit_dag(
|
|
93
|
+
self, session_id, dag, managed_input_infos, new_settings
|
|
94
|
+
)
|
|
92
95
|
|
|
93
96
|
no_task_server_raised = False
|
|
94
97
|
original_get_dag_info = MaxFrameRestCaller.get_dag_info
|
|
@@ -130,11 +133,19 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
130
133
|
)
|
|
131
134
|
assert odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
|
|
132
135
|
del df
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
build_temp_table_name(start_mock_session, intermediate_key)
|
|
136
|
+
ensure_table_deleted(
|
|
137
|
+
odps_entry, build_temp_table_name(start_mock_session, intermediate_key)
|
|
136
138
|
)
|
|
137
|
-
|
|
139
|
+
ensure_table_deleted(odps_entry, build_temp_table_name(start_mock_session, key))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_run_and_fetch_slice(start_mock_session):
|
|
143
|
+
pd_df = pd.DataFrame(np.random.rand(1000, 5), columns=list("ABCDE"))
|
|
144
|
+
df = md.DataFrame(pd_df)
|
|
145
|
+
result = df.execute()
|
|
146
|
+
|
|
147
|
+
sliced = result.head(10).fetch()
|
|
148
|
+
assert len(sliced) == 10
|
|
138
149
|
|
|
139
150
|
|
|
140
151
|
def test_run_empty_table(start_mock_session):
|
|
@@ -157,6 +168,25 @@ def test_run_empty_table(start_mock_session):
|
|
|
157
168
|
empty_table.drop()
|
|
158
169
|
|
|
159
170
|
|
|
171
|
+
def test_run_odps_query_without_schema(start_mock_session):
|
|
172
|
+
odps_entry = ODPS.from_environments()
|
|
173
|
+
|
|
174
|
+
table_name = tn("test_query_without_schema")
|
|
175
|
+
odps_entry.delete_table(table_name, if_exists=True)
|
|
176
|
+
test_table = odps_entry.create_table(table_name, "a double, b double", lifecycle=1)
|
|
177
|
+
|
|
178
|
+
with test_table.open_writer() as writer:
|
|
179
|
+
writer.write([123, 456])
|
|
180
|
+
|
|
181
|
+
df = md.read_odps_query(
|
|
182
|
+
f"select a, b, a + b as `special: name` from {table_name}", skip_schema=True
|
|
183
|
+
)
|
|
184
|
+
executed = df.execute().fetch()
|
|
185
|
+
assert len(executed.dtypes) == 3
|
|
186
|
+
|
|
187
|
+
test_table.drop()
|
|
188
|
+
|
|
189
|
+
|
|
160
190
|
def test_run_dataframe_with_pd_source(start_mock_session):
|
|
161
191
|
odps_entry = ODPS.from_environments()
|
|
162
192
|
|
|
@@ -189,7 +219,7 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
|
|
|
189
219
|
table_name = build_temp_table_name(start_mock_session, "tmp_save")
|
|
190
220
|
table_obj = odps_entry.get_table(table_name)
|
|
191
221
|
try:
|
|
192
|
-
md.to_odps_table(md.DataFrame(pd_df), table_obj).execute().fetch()
|
|
222
|
+
md.to_odps_table(md.DataFrame(pd_df), table_obj, lifecycle=1).execute().fetch()
|
|
193
223
|
with table_obj.open_reader() as reader:
|
|
194
224
|
result_df = reader.to_pandas()
|
|
195
225
|
assert len(result_df) == 10
|
|
@@ -237,9 +267,8 @@ def test_run_and_fetch_series(start_mock_session):
|
|
|
237
267
|
pd.testing.assert_series_equal(pd_result, result)
|
|
238
268
|
|
|
239
269
|
del s1
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
build_temp_table_name(start_mock_session, src_key)
|
|
270
|
+
ensure_table_deleted(
|
|
271
|
+
odps_entry, build_temp_table_name(start_mock_session, src_key)
|
|
243
272
|
)
|
|
244
273
|
finally:
|
|
245
274
|
odps_entry.delete_table(
|
|
@@ -256,7 +285,10 @@ def test_execute_with_tensor(oss_config, start_mock_session):
|
|
|
256
285
|
|
|
257
286
|
result = (df - [1, 2]).execute().fetch()
|
|
258
287
|
expected = pd_df - [1, 2]
|
|
259
|
-
|
|
288
|
+
# TODO: currently the record order in tensor reading from table is the index
|
|
289
|
+
# sorting order
|
|
290
|
+
expected.sort_index(axis=0, inplace=True)
|
|
291
|
+
pd.testing.assert_frame_equal(result, expected, check_like=True)
|
|
260
292
|
|
|
261
293
|
|
|
262
294
|
def test_run_remote_success(oss_config, start_mock_session):
|
|
File without changes
|