maxframe 1.0.0rc3__cp37-cp37m-win_amd64.whl → 1.0.0rc4__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp37-win_amd64.pyd +0 -0
- maxframe/codegen.py +1 -0
- maxframe/config/config.py +13 -1
- maxframe/conftest.py +43 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
- maxframe/dataframe/arithmetic/docstring.py +26 -2
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/core.py +2 -0
- maxframe/dataframe/datasource/read_odps_query.py +66 -7
- maxframe/dataframe/datasource/read_odps_table.py +3 -1
- maxframe/dataframe/datasource/tests/test_datasource.py +35 -6
- maxframe/dataframe/datastore/to_odps.py +7 -0
- maxframe/dataframe/extensions/__init__.py +3 -0
- maxframe/dataframe/extensions/flatmap.py +326 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +62 -1
- maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
- maxframe/dataframe/indexing/rename.py +11 -0
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/misc/drop_duplicates.py +18 -1
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/io/odpsio/schema.py +5 -3
- maxframe/io/odpsio/tableio.py +44 -38
- maxframe/io/odpsio/tests/test_schema.py +0 -4
- maxframe/io/odpsio/volumeio.py +9 -3
- maxframe/learn/contrib/__init__.py +2 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/xgboost/classifier.py +3 -3
- maxframe/learn/contrib/xgboost/predict.py +8 -39
- maxframe/learn/contrib/xgboost/train.py +4 -3
- maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
- maxframe/opcodes.py +3 -0
- maxframe/protocol.py +6 -1
- maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
- maxframe/session.py +9 -2
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/concatenate.py +23 -20
- maxframe/tensor/merge/vstack.py +5 -1
- maxframe/tensor/misc/transpose.py +1 -1
- maxframe/utils.py +34 -12
- {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/METADATA +1 -1
- {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/RECORD +57 -52
- maxframe_client/fetcher.py +10 -8
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/odps.py +84 -13
- maxframe_client/session/task.py +58 -20
- maxframe_client/tests/test_session.py +14 -2
- {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/WHEEL +0 -0
- {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/top_level.txt +0 -0
maxframe_client/session/odps.py
CHANGED
|
@@ -18,12 +18,13 @@ import logging
|
|
|
18
18
|
import time
|
|
19
19
|
import weakref
|
|
20
20
|
from numbers import Integral
|
|
21
|
-
from typing import Dict, List, Mapping, Optional, Tuple, Union
|
|
21
|
+
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
|
|
22
22
|
from urllib.parse import urlparse
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
25
|
import pandas as pd
|
|
26
26
|
from odps import ODPS
|
|
27
|
+
from odps import options as odps_options
|
|
27
28
|
|
|
28
29
|
from maxframe.config import options
|
|
29
30
|
from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
|
|
@@ -65,6 +66,8 @@ from maxframe.utils import (
|
|
|
65
66
|
ToThreadMixin,
|
|
66
67
|
build_session_volume_name,
|
|
67
68
|
build_temp_table_name,
|
|
69
|
+
str_to_bool,
|
|
70
|
+
sync_pyodps_options,
|
|
68
71
|
)
|
|
69
72
|
|
|
70
73
|
from ..clients.framedriver import FrameDriverClient
|
|
@@ -76,6 +79,43 @@ logger = logging.getLogger(__name__)
|
|
|
76
79
|
|
|
77
80
|
|
|
78
81
|
class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
82
|
+
def get_settings_to_upload(self) -> Dict[str, Any]:
|
|
83
|
+
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
84
|
+
sql_settings.update(options.sql.settings or {})
|
|
85
|
+
|
|
86
|
+
quota_name = options.session.quota_name or getattr(
|
|
87
|
+
odps_options, "quota_name", None
|
|
88
|
+
)
|
|
89
|
+
lifecycle = options.session.table_lifecycle or odps_options.lifecycle
|
|
90
|
+
temp_lifecycle = (
|
|
91
|
+
options.session.temp_table_lifecycle or odps_options.temp_lifecycle
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
enable_schema = options.session.enable_schema
|
|
95
|
+
default_schema = options.session.default_schema
|
|
96
|
+
if hasattr(self, "_odps_entry"):
|
|
97
|
+
default_schema = default_schema or self._odps_entry.schema
|
|
98
|
+
|
|
99
|
+
# use flags in sql settings
|
|
100
|
+
if sql_settings.get("odps.default.schema"):
|
|
101
|
+
default_schema = sql_settings["odps.default.schema"]
|
|
102
|
+
if str_to_bool(
|
|
103
|
+
sql_settings.get("odps.namespace.schema") or "false"
|
|
104
|
+
) or str_to_bool(
|
|
105
|
+
sql_settings.get("odps.sql.allow.namespace.schema") or "false"
|
|
106
|
+
):
|
|
107
|
+
enable_schema = True
|
|
108
|
+
|
|
109
|
+
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
110
|
+
mf_settings["sql.settings"] = sql_settings
|
|
111
|
+
mf_settings["session.table_lifecycle"] = lifecycle
|
|
112
|
+
mf_settings["session.temp_table_lifecycle"] = temp_lifecycle
|
|
113
|
+
mf_settings["session.quota_name"] = quota_name
|
|
114
|
+
if enable_schema is not None:
|
|
115
|
+
mf_settings["session.enable_schema"] = enable_schema
|
|
116
|
+
mf_settings["session.default_schema"] = default_schema or "default"
|
|
117
|
+
return mf_settings
|
|
118
|
+
|
|
79
119
|
@abc.abstractmethod
|
|
80
120
|
def create_session(self) -> SessionInfo:
|
|
81
121
|
raise NotImplementedError
|
|
@@ -86,7 +126,10 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
|
86
126
|
|
|
87
127
|
@abc.abstractmethod
|
|
88
128
|
def submit_dag(
|
|
89
|
-
self,
|
|
129
|
+
self,
|
|
130
|
+
dag: TileableGraph,
|
|
131
|
+
managed_input_infos: Dict[str, ResultInfo],
|
|
132
|
+
new_settings: Dict[str, Any] = None,
|
|
90
133
|
) -> DagInfo:
|
|
91
134
|
raise NotImplementedError
|
|
92
135
|
|
|
@@ -140,6 +183,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
140
183
|
self._tileable_to_infos = weakref.WeakKeyDictionary()
|
|
141
184
|
|
|
142
185
|
self._caller = self._create_caller(odps_entry, address, **kwargs)
|
|
186
|
+
self._last_settings = None
|
|
143
187
|
|
|
144
188
|
@classmethod
|
|
145
189
|
def _create_caller(
|
|
@@ -149,13 +193,14 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
149
193
|
|
|
150
194
|
async def _init(self, _address: str):
|
|
151
195
|
session_info = await self.ensure_async_call(self._caller.create_session)
|
|
196
|
+
self._last_settings = self._caller.get_settings_to_upload()
|
|
152
197
|
self._session_id = session_info.session_id
|
|
153
198
|
await self._show_logview_address()
|
|
154
199
|
|
|
155
200
|
def _upload_and_get_table_read_tileable(
|
|
156
201
|
self, t: TileableType
|
|
157
202
|
) -> Optional[TileableType]:
|
|
158
|
-
|
|
203
|
+
table_schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
|
|
159
204
|
if self._odps_entry.exist_table(table_meta.table_name):
|
|
160
205
|
self._odps_entry.delete_table(
|
|
161
206
|
table_meta.table_name, hints=options.sql.settings
|
|
@@ -163,7 +208,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
163
208
|
table_name = build_temp_table_name(self.session_id, t.key)
|
|
164
209
|
table_obj = self._odps_entry.create_table(
|
|
165
210
|
table_name,
|
|
166
|
-
|
|
211
|
+
table_schema,
|
|
167
212
|
lifecycle=options.session.temp_table_lifecycle,
|
|
168
213
|
hints=options.sql.settings,
|
|
169
214
|
)
|
|
@@ -217,10 +262,11 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
217
262
|
or t.inputs
|
|
218
263
|
):
|
|
219
264
|
return None
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
265
|
+
with sync_pyodps_options():
|
|
266
|
+
if isinstance(t.op, PandasDataSourceOperator):
|
|
267
|
+
return self._upload_and_get_table_read_tileable(t)
|
|
268
|
+
else:
|
|
269
|
+
return self._upload_and_get_vol_read_tileable(t)
|
|
224
270
|
|
|
225
271
|
@enter_mode(kernel=True, build=True)
|
|
226
272
|
def _scan_and_replace_local_sources(
|
|
@@ -244,7 +290,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
244
290
|
|
|
245
291
|
for succ in successors:
|
|
246
292
|
graph.add_edge(replaced, succ)
|
|
247
|
-
succ.
|
|
293
|
+
succ.op._set_inputs([replacements.get(t, t) for t in succ.inputs])
|
|
248
294
|
|
|
249
295
|
graph.results = [replacements.get(t, t) for t in graph.results]
|
|
250
296
|
return replacements
|
|
@@ -269,6 +315,24 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
269
315
|
infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
|
|
270
316
|
return infos
|
|
271
317
|
|
|
318
|
+
def _get_diff_settings(self) -> Dict[str, Any]:
|
|
319
|
+
new_settings = self._caller.get_settings_to_upload()
|
|
320
|
+
if not self._last_settings: # pragma: no cover
|
|
321
|
+
self._last_settings = new_settings
|
|
322
|
+
return new_settings
|
|
323
|
+
|
|
324
|
+
update = dict()
|
|
325
|
+
for k in new_settings.keys():
|
|
326
|
+
old_item = self._last_settings.get(k)
|
|
327
|
+
new_item = new_settings.get(k)
|
|
328
|
+
try:
|
|
329
|
+
if old_item != new_item:
|
|
330
|
+
update[k] = new_item
|
|
331
|
+
except: # noqa: E722 # nosec # pylint: disable=bare-except
|
|
332
|
+
update[k] = new_item
|
|
333
|
+
self._last_settings = new_settings
|
|
334
|
+
return update
|
|
335
|
+
|
|
272
336
|
async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
|
|
273
337
|
tileables = [
|
|
274
338
|
tileable.data if isinstance(tileable, Entity) else tileable
|
|
@@ -288,7 +352,10 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
288
352
|
|
|
289
353
|
replaced_infos = self._get_input_infos(list(source_replacements.values()))
|
|
290
354
|
dag_info = await self.ensure_async_call(
|
|
291
|
-
self._caller.submit_dag,
|
|
355
|
+
self._caller.submit_dag,
|
|
356
|
+
tileable_graph,
|
|
357
|
+
replaced_infos,
|
|
358
|
+
self._get_diff_settings(),
|
|
292
359
|
)
|
|
293
360
|
|
|
294
361
|
await self._show_logview_address(dag_info.dag_id)
|
|
@@ -498,7 +565,8 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
|
|
|
498
565
|
_client: FrameDriverClient
|
|
499
566
|
_session_id: Optional[str]
|
|
500
567
|
|
|
501
|
-
def __init__(self, client: FrameDriverClient):
|
|
568
|
+
def __init__(self, odps_entry: ODPS, client: FrameDriverClient):
|
|
569
|
+
self._odps_entry = odps_entry
|
|
502
570
|
self._client = client
|
|
503
571
|
self._session_id = None
|
|
504
572
|
|
|
@@ -511,7 +579,10 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
|
|
|
511
579
|
await self._client.delete_session(self._session_id)
|
|
512
580
|
|
|
513
581
|
async def submit_dag(
|
|
514
|
-
self,
|
|
582
|
+
self,
|
|
583
|
+
dag: TileableGraph,
|
|
584
|
+
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
585
|
+
new_settings: Dict[str, Any] = None,
|
|
515
586
|
) -> DagInfo:
|
|
516
587
|
return await self._client.submit_dag(self._session_id, dag, managed_input_infos)
|
|
517
588
|
|
|
@@ -551,7 +622,7 @@ class MaxFrameRestSession(MaxFrameSession):
|
|
|
551
622
|
|
|
552
623
|
@classmethod
|
|
553
624
|
def _create_caller(cls, odps_entry: ODPS, address: str, **kwargs):
|
|
554
|
-
return MaxFrameRestCaller(FrameDriverClient(address))
|
|
625
|
+
return MaxFrameRestCaller(odps_entry, FrameDriverClient(address))
|
|
555
626
|
|
|
556
627
|
|
|
557
628
|
def register_session_schemes(overwrite: bool = False):
|
maxframe_client/session/task.py
CHANGED
|
@@ -16,7 +16,7 @@ import base64
|
|
|
16
16
|
import json
|
|
17
17
|
import logging
|
|
18
18
|
import time
|
|
19
|
-
from typing import Dict, List, Optional, Type, Union
|
|
19
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
20
20
|
|
|
21
21
|
import msgpack
|
|
22
22
|
from odps import ODPS
|
|
@@ -24,6 +24,12 @@ from odps import options as odps_options
|
|
|
24
24
|
from odps.errors import parse_instance_error
|
|
25
25
|
from odps.models import Instance, MaxFrameTask
|
|
26
26
|
|
|
27
|
+
try:
|
|
28
|
+
from odps.errors import EmptyTaskInfoError
|
|
29
|
+
except ImportError: # pragma: no cover
|
|
30
|
+
# todo remove when pyodps>=0.12.0 is enforced
|
|
31
|
+
EmptyTaskInfoError = type("EmptyTaskInfoError", (Exception,), {})
|
|
32
|
+
|
|
27
33
|
from maxframe.config import options
|
|
28
34
|
from maxframe.core import TileableGraph
|
|
29
35
|
from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
|
|
@@ -36,6 +42,7 @@ except ImportError:
|
|
|
36
42
|
mf_version = None
|
|
37
43
|
|
|
38
44
|
from .consts import (
|
|
45
|
+
EMPTY_RESPONSE_RETRY_COUNT,
|
|
39
46
|
MAXFRAME_DEFAULT_PROTOCOL,
|
|
40
47
|
MAXFRAME_OUTPUT_JSON_FORMAT,
|
|
41
48
|
MAXFRAME_OUTPUT_MAXFRAME_FORMAT,
|
|
@@ -92,6 +99,10 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
92
99
|
self._nested = True
|
|
93
100
|
self._instance = odps_entry.get_instance(nested_instance_id)
|
|
94
101
|
|
|
102
|
+
@property
|
|
103
|
+
def instance(self):
|
|
104
|
+
return self._instance
|
|
105
|
+
|
|
95
106
|
def _deserial_task_info_result(
|
|
96
107
|
self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
|
|
97
108
|
):
|
|
@@ -125,16 +136,8 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
125
136
|
major_version=self._major_version,
|
|
126
137
|
service_endpoint=self._odps_entry.endpoint,
|
|
127
138
|
)
|
|
128
|
-
|
|
129
|
-
# merge sql options
|
|
130
|
-
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
131
|
-
sql_settings.update(options.sql.settings or {})
|
|
132
|
-
|
|
133
|
-
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
134
|
-
mf_settings["sql.settings"] = sql_settings
|
|
135
|
-
|
|
136
139
|
mf_opts = {
|
|
137
|
-
"odps.maxframe.settings": json.dumps(
|
|
140
|
+
"odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
|
|
138
141
|
"odps.maxframe.output_format": self._output_format,
|
|
139
142
|
}
|
|
140
143
|
if mf_version:
|
|
@@ -189,18 +192,39 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
189
192
|
interval = min(max_interval, interval * 2)
|
|
190
193
|
|
|
191
194
|
def _put_task_info(self, method_name: str, json_data: dict):
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
195
|
+
for trial in range(EMPTY_RESPONSE_RETRY_COUNT):
|
|
196
|
+
try:
|
|
197
|
+
return self._instance.put_task_info(
|
|
198
|
+
self._task_name,
|
|
199
|
+
method_name,
|
|
200
|
+
json.dumps(json_data),
|
|
201
|
+
raise_empty=True,
|
|
202
|
+
)
|
|
203
|
+
except TypeError: # pragma: no cover
|
|
204
|
+
# todo remove when pyodps>=0.12.0 is enforced
|
|
205
|
+
resp_data = self._instance.put_task_info(
|
|
206
|
+
self._task_name, method_name, json.dumps(json_data)
|
|
207
|
+
)
|
|
208
|
+
if resp_data:
|
|
209
|
+
return resp_data
|
|
210
|
+
else:
|
|
211
|
+
raise NoTaskServerResponseError(
|
|
212
|
+
f"No response for request {method_name}. "
|
|
213
|
+
f"Instance ID: {self._instance.id}"
|
|
214
|
+
)
|
|
215
|
+
except EmptyTaskInfoError as ex:
|
|
216
|
+
# retry when server returns HTTP 204, which is designed for retry
|
|
217
|
+
if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
|
|
218
|
+
raise NoTaskServerResponseError(
|
|
219
|
+
f"No response for request {method_name}. "
|
|
220
|
+
f"Instance ID: {self._instance.id}. "
|
|
221
|
+
f"Request ID: {ex.request_id}"
|
|
222
|
+
) from None
|
|
223
|
+
time.sleep(0.5)
|
|
198
224
|
|
|
199
225
|
def get_session(self) -> SessionInfo:
|
|
200
226
|
req_data = {"output_format": self._output_format}
|
|
201
|
-
serialized = self.
|
|
202
|
-
self._task_name, MAXFRAME_TASK_GET_SESSION_METHOD, json.dumps(req_data)
|
|
203
|
-
)
|
|
227
|
+
serialized = self._put_task_info(MAXFRAME_TASK_GET_SESSION_METHOD, req_data)
|
|
204
228
|
info: SessionInfo = self._deserial_task_info_result(serialized, SessionInfo)
|
|
205
229
|
info.session_id = self._instance.id
|
|
206
230
|
return info
|
|
@@ -217,13 +241,18 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
217
241
|
self,
|
|
218
242
|
dag: TileableGraph,
|
|
219
243
|
managed_input_infos: Optional[Dict[str, ResultInfo]] = None,
|
|
244
|
+
new_settings: Dict[str, Any] = None,
|
|
220
245
|
) -> DagInfo:
|
|
246
|
+
new_settings_value = {
|
|
247
|
+
"odps.maxframe.settings": json.dumps(new_settings),
|
|
248
|
+
}
|
|
221
249
|
req_data = {
|
|
222
250
|
"protocol": MAXFRAME_DEFAULT_PROTOCOL,
|
|
223
251
|
"dag": base64.b64encode(serialize_serializable(dag)).decode(),
|
|
224
252
|
"managed_input_infos": base64.b64encode(
|
|
225
253
|
serialize_serializable(managed_input_infos)
|
|
226
254
|
).decode(),
|
|
255
|
+
"new_settings": json.dumps(new_settings_value),
|
|
227
256
|
"output_format": self._output_format,
|
|
228
257
|
}
|
|
229
258
|
res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
|
|
@@ -276,7 +305,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
276
305
|
class MaxFrameTaskSession(MaxFrameSession):
|
|
277
306
|
schemes = [ODPS_SESSION_INSECURE_SCHEME, ODPS_SESSION_SECURE_SCHEME]
|
|
278
307
|
|
|
279
|
-
|
|
308
|
+
_caller: MaxFrameInstanceCaller
|
|
280
309
|
|
|
281
310
|
@classmethod
|
|
282
311
|
def _create_caller(
|
|
@@ -296,6 +325,15 @@ class MaxFrameTaskSession(MaxFrameSession):
|
|
|
296
325
|
**kwargs,
|
|
297
326
|
)
|
|
298
327
|
|
|
328
|
+
@property
|
|
329
|
+
def closed(self) -> bool:
|
|
330
|
+
if super().closed:
|
|
331
|
+
return True
|
|
332
|
+
if not self._caller or not self._caller.instance:
|
|
333
|
+
# session not initialized yet
|
|
334
|
+
return False
|
|
335
|
+
return self._caller.instance.is_terminated()
|
|
336
|
+
|
|
299
337
|
|
|
300
338
|
def register_session_schemes(overwrite: bool = False):
|
|
301
339
|
MaxFrameTaskSession.register_schemes(overwrite=overwrite)
|
|
@@ -137,6 +137,15 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
137
137
|
assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
|
|
138
138
|
|
|
139
139
|
|
|
140
|
+
def test_run_and_fetch_slice(start_mock_session):
|
|
141
|
+
pd_df = pd.DataFrame(np.random.rand(1000, 5), columns=list("ABCDE"))
|
|
142
|
+
df = md.DataFrame(pd_df)
|
|
143
|
+
result = df.execute()
|
|
144
|
+
|
|
145
|
+
sliced = result.head(10).fetch()
|
|
146
|
+
assert len(sliced) == 10
|
|
147
|
+
|
|
148
|
+
|
|
140
149
|
def test_run_empty_table(start_mock_session):
|
|
141
150
|
odps_entry = ODPS.from_environments()
|
|
142
151
|
|
|
@@ -189,7 +198,7 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
|
|
|
189
198
|
table_name = build_temp_table_name(start_mock_session, "tmp_save")
|
|
190
199
|
table_obj = odps_entry.get_table(table_name)
|
|
191
200
|
try:
|
|
192
|
-
md.to_odps_table(md.DataFrame(pd_df), table_obj).execute().fetch()
|
|
201
|
+
md.to_odps_table(md.DataFrame(pd_df), table_obj, lifecycle=1).execute().fetch()
|
|
193
202
|
with table_obj.open_reader() as reader:
|
|
194
203
|
result_df = reader.to_pandas()
|
|
195
204
|
assert len(result_df) == 10
|
|
@@ -256,7 +265,10 @@ def test_execute_with_tensor(oss_config, start_mock_session):
|
|
|
256
265
|
|
|
257
266
|
result = (df - [1, 2]).execute().fetch()
|
|
258
267
|
expected = pd_df - [1, 2]
|
|
259
|
-
|
|
268
|
+
# TODO: currently the record order in tensor reading from table is the index
|
|
269
|
+
# sorting order
|
|
270
|
+
expected.sort_index(axis=0, inplace=True)
|
|
271
|
+
pd.testing.assert_frame_equal(result, expected, check_like=True)
|
|
260
272
|
|
|
261
273
|
|
|
262
274
|
def test_run_remote_success(oss_config, start_mock_session):
|
|
File without changes
|
|
File without changes
|