maxframe 0.1.0b5__cp39-cp39-win_amd64.whl → 1.0.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp39-win_amd64.pyd +0 -0
- maxframe/codegen.py +10 -2
- maxframe/config/config.py +4 -0
- maxframe/core/__init__.py +0 -3
- maxframe/core/entity/__init__.py +1 -8
- maxframe/core/entity/objects.py +3 -45
- maxframe/core/graph/core.cp39-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +21 -0
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/misc/apply.py +2 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/tests/test_misc.py +23 -0
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/lib/mmh3.cp39-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/protocol.py +108 -10
- maxframe/serialization/core.cp39-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +54 -25
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +7 -2
- maxframe/serialization/serializables/core.py +119 -12
- maxframe/serialization/serializables/tests/test_serializable.py +46 -4
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
- maxframe/tensor/base/atleast_1d.py +1 -1
- maxframe/tensor/base/unique.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +1 -1
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +2 -2
- maxframe/utils.py +16 -13
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/METADATA +2 -2
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/RECORD +46 -44
- maxframe_client/__init__.py +0 -1
- maxframe_client/session/odps.py +45 -5
- maxframe_client/session/task.py +41 -20
- maxframe_client/tests/test_session.py +36 -0
- maxframe_client/clients/spe.py +0 -104
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/top_level.txt +0 -0
maxframe_client/session/task.py
CHANGED
|
@@ -26,6 +26,7 @@ from odps.models import Instance, MaxFrameTask
|
|
|
26
26
|
|
|
27
27
|
from maxframe.config import options
|
|
28
28
|
from maxframe.core import TileableGraph
|
|
29
|
+
from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
|
|
29
30
|
from maxframe.protocol import DagInfo, JsonSerializable, ResultInfo, SessionInfo
|
|
30
31
|
from maxframe.utils import deserialize_serializable, serialize_serializable, to_str
|
|
31
32
|
|
|
@@ -82,6 +83,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
82
83
|
self._running_cluster = running_cluster
|
|
83
84
|
self._major_version = major_version
|
|
84
85
|
self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
|
|
86
|
+
self._deleted = False
|
|
85
87
|
|
|
86
88
|
if nested_instance_id is None:
|
|
87
89
|
self._nested = False
|
|
@@ -94,10 +96,18 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
94
96
|
self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
|
|
95
97
|
):
|
|
96
98
|
if isinstance(content, (str, bytes)):
|
|
99
|
+
if len(content) == 0:
|
|
100
|
+
content = "{}"
|
|
97
101
|
json_data = json.loads(to_str(content))
|
|
98
102
|
else:
|
|
99
103
|
json_data = content
|
|
100
|
-
|
|
104
|
+
encoded_result = json_data.get("result")
|
|
105
|
+
if not encoded_result:
|
|
106
|
+
if self._deleted:
|
|
107
|
+
return None
|
|
108
|
+
else:
|
|
109
|
+
raise SessionAlreadyClosedError(self._instance.id)
|
|
110
|
+
result_data = base64.b64decode(encoded_result)
|
|
101
111
|
if self._output_format == MAXFRAME_OUTPUT_MAXFRAME_FORMAT:
|
|
102
112
|
return deserialize_serializable(result_data)
|
|
103
113
|
elif self._output_format == MAXFRAME_OUTPUT_JSON_FORMAT:
|
|
@@ -178,6 +188,14 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
178
188
|
time.sleep(interval)
|
|
179
189
|
interval = min(max_interval, interval * 2)
|
|
180
190
|
|
|
191
|
+
def _put_task_info(self, method_name: str, json_data: dict):
|
|
192
|
+
resp_data = self._instance.put_task_info(
|
|
193
|
+
self._task_name, method_name, json.dumps(json_data)
|
|
194
|
+
)
|
|
195
|
+
if not resp_data:
|
|
196
|
+
raise NoTaskServerResponseError(f"No response for request {method_name}")
|
|
197
|
+
return resp_data
|
|
198
|
+
|
|
181
199
|
def get_session(self) -> SessionInfo:
|
|
182
200
|
req_data = {"output_format": self._output_format}
|
|
183
201
|
serialized = self._instance.put_task_info(
|
|
@@ -192,11 +210,8 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
192
210
|
self._instance.stop()
|
|
193
211
|
else:
|
|
194
212
|
req_data = {"output_format": self._output_format}
|
|
195
|
-
self.
|
|
196
|
-
|
|
197
|
-
MAXFRAME_TASK_DELETE_SESSION_METHOD,
|
|
198
|
-
json.dumps(req_data),
|
|
199
|
-
)
|
|
213
|
+
self._put_task_info(MAXFRAME_TASK_DELETE_SESSION_METHOD, req_data)
|
|
214
|
+
self._deleted = True
|
|
200
215
|
|
|
201
216
|
def submit_dag(
|
|
202
217
|
self,
|
|
@@ -211,9 +226,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
211
226
|
).decode(),
|
|
212
227
|
"output_format": self._output_format,
|
|
213
228
|
}
|
|
214
|
-
res = self.
|
|
215
|
-
self._task_name, MAXFRAME_TASK_SUBMIT_DAG_METHOD, json.dumps(req_data)
|
|
216
|
-
)
|
|
229
|
+
res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
|
|
217
230
|
return self._deserial_task_info_result(res, DagInfo)
|
|
218
231
|
|
|
219
232
|
def get_dag_info(self, dag_id: str) -> DagInfo:
|
|
@@ -222,9 +235,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
222
235
|
"dag_id": dag_id,
|
|
223
236
|
"output_format": self._output_format,
|
|
224
237
|
}
|
|
225
|
-
res = self.
|
|
226
|
-
self._task_name, MAXFRAME_TASK_GET_DAG_INFO_METHOD, json.dumps(req_data)
|
|
227
|
-
)
|
|
238
|
+
res = self._put_task_info(MAXFRAME_TASK_GET_DAG_INFO_METHOD, req_data)
|
|
228
239
|
return self._deserial_task_info_result(res, DagInfo)
|
|
229
240
|
|
|
230
241
|
def cancel_dag(self, dag_id: str) -> DagInfo:
|
|
@@ -233,23 +244,33 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
233
244
|
"dag_id": dag_id,
|
|
234
245
|
"output_format": self._output_format,
|
|
235
246
|
}
|
|
236
|
-
res = self.
|
|
237
|
-
self._task_name, MAXFRAME_TASK_CANCEL_DAG_METHOD, json.dumps(req_data)
|
|
238
|
-
)
|
|
247
|
+
res = self._put_task_info(MAXFRAME_TASK_CANCEL_DAG_METHOD, req_data)
|
|
239
248
|
return self._deserial_task_info_result(res, DagInfo)
|
|
240
249
|
|
|
241
250
|
def decref(self, tileable_keys: List[str]) -> None:
|
|
242
251
|
req_data = {
|
|
243
252
|
"tileable_keys": ",".join(tileable_keys),
|
|
244
253
|
}
|
|
245
|
-
self.
|
|
246
|
-
self._task_name, MAXFRAME_TASK_DECREF_METHOD, json.dumps(req_data)
|
|
247
|
-
)
|
|
254
|
+
self._put_task_info(MAXFRAME_TASK_DECREF_METHOD, req_data)
|
|
248
255
|
|
|
249
256
|
def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
|
|
257
|
+
"""
|
|
258
|
+
Generate logview address
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
dag_id: id of dag for which dag logview detail page to access
|
|
263
|
+
hours: hours of the logview address auth limit
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
Logview address
|
|
267
|
+
"""
|
|
250
268
|
hours = hours or options.session.logview_hours
|
|
251
|
-
|
|
252
|
-
return
|
|
269
|
+
# notice: maxframe can't reuse subQuery else will conflict with mcqa when fetch resource data,
|
|
270
|
+
# added dagId for maxframe so logview backend will return maxframe data format if
|
|
271
|
+
# instance and dagId is provided.
|
|
272
|
+
dag_suffix = f"&dagId={dag_id}" if dag_id else ""
|
|
273
|
+
return self._instance.get_logview_address(hours) + dag_suffix
|
|
253
274
|
|
|
254
275
|
|
|
255
276
|
class MaxFrameTaskSession(MaxFrameSession):
|
|
@@ -23,7 +23,10 @@ from odps import ODPS
|
|
|
23
23
|
|
|
24
24
|
import maxframe.dataframe as md
|
|
25
25
|
import maxframe.remote as mr
|
|
26
|
+
from maxframe.config import options
|
|
27
|
+
from maxframe.config.config import option_context
|
|
26
28
|
from maxframe.core import ExecutableTuple, TileableGraph
|
|
29
|
+
from maxframe.errors import NoTaskServerResponseError
|
|
27
30
|
from maxframe.lib.aio import stop_isolation
|
|
28
31
|
from maxframe.protocol import ResultInfo
|
|
29
32
|
from maxframe.serialization import RemoteException
|
|
@@ -35,6 +38,7 @@ from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F4
|
|
|
35
38
|
)
|
|
36
39
|
|
|
37
40
|
from ..clients.framedriver import FrameDriverClient
|
|
41
|
+
from ..session.odps import MaxFrameRestCaller
|
|
38
42
|
|
|
39
43
|
pytestmark = pytest.mark.maxframe_engine(["MCSQL", "SPE"])
|
|
40
44
|
|
|
@@ -86,11 +90,25 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
86
90
|
assert len(dag) == 2
|
|
87
91
|
return await original_submit_dag(self, session_id, dag, managed_input_infos)
|
|
88
92
|
|
|
93
|
+
no_task_server_raised = False
|
|
94
|
+
original_get_dag_info = MaxFrameRestCaller.get_dag_info
|
|
95
|
+
|
|
96
|
+
async def patched_get_dag_info(self, dag_id: str):
|
|
97
|
+
nonlocal no_task_server_raised
|
|
98
|
+
|
|
99
|
+
if not no_task_server_raised:
|
|
100
|
+
no_task_server_raised = True
|
|
101
|
+
raise NoTaskServerResponseError
|
|
102
|
+
return await original_get_dag_info(self, dag_id)
|
|
103
|
+
|
|
89
104
|
df["H"] = "extra_content"
|
|
90
105
|
|
|
91
106
|
with mock.patch(
|
|
92
107
|
"maxframe_client.clients.framedriver.FrameDriverClient.submit_dag",
|
|
93
108
|
new=patched_submit_dag,
|
|
109
|
+
), mock.patch(
|
|
110
|
+
"maxframe_client.session.odps.MaxFrameRestCaller.get_dag_info",
|
|
111
|
+
new=patched_get_dag_info,
|
|
94
112
|
):
|
|
95
113
|
result = df.execute().fetch()
|
|
96
114
|
assert len(result) == 1000
|
|
@@ -184,6 +202,24 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
|
|
|
184
202
|
odps_entry.delete_table(table_name, if_exists=True)
|
|
185
203
|
|
|
186
204
|
|
|
205
|
+
def test_create_session_with_options(framedriver_app): # noqa: F811
|
|
206
|
+
odps_entry = ODPS.from_environments()
|
|
207
|
+
framedriver_addr = f"mf://localhost:{framedriver_app.port}"
|
|
208
|
+
old_value = options.session.max_alive_seconds
|
|
209
|
+
session = None
|
|
210
|
+
try:
|
|
211
|
+
options.session.max_alive_seconds = 10
|
|
212
|
+
session = new_session(framedriver_addr, odps_entry=odps_entry)
|
|
213
|
+
session_id = session.session_id
|
|
214
|
+
session_conf = framedriver_app.session_manager.get_session_settings(session_id)
|
|
215
|
+
with option_context(session_conf) as session_options:
|
|
216
|
+
assert session_options.session.max_alive_seconds == 10
|
|
217
|
+
finally:
|
|
218
|
+
options.session.max_alive_seconds = old_value
|
|
219
|
+
if session is not None:
|
|
220
|
+
session.destroy()
|
|
221
|
+
|
|
222
|
+
|
|
187
223
|
def test_run_and_fetch_series(start_mock_session):
|
|
188
224
|
odps_entry = ODPS.from_environments()
|
|
189
225
|
|
maxframe_client/clients/spe.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from typing import Any, Dict, Optional
|
|
16
|
-
|
|
17
|
-
from tornado import httpclient
|
|
18
|
-
|
|
19
|
-
from maxframe.core import TileableGraph
|
|
20
|
-
from maxframe.protocol import ExecuteSubDagRequest, ProtocolBody, SubDagInfo
|
|
21
|
-
from maxframe.typing_ import TimeoutType
|
|
22
|
-
from maxframe.utils import (
|
|
23
|
-
deserialize_serializable,
|
|
24
|
-
format_timeout_params,
|
|
25
|
-
serialize_serializable,
|
|
26
|
-
wait_http_response,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class SPEClient:
|
|
31
|
-
def __init__(
|
|
32
|
-
self,
|
|
33
|
-
endpoint: str,
|
|
34
|
-
session_id: Optional[str] = None,
|
|
35
|
-
host: str = None,
|
|
36
|
-
):
|
|
37
|
-
self._endpoint = endpoint.rstrip("/")
|
|
38
|
-
self._session_id = session_id
|
|
39
|
-
self._headers = {"Host": host}
|
|
40
|
-
|
|
41
|
-
@staticmethod
|
|
42
|
-
def _load_subdag_info(resp: httpclient.HTTPResponse) -> SubDagInfo:
|
|
43
|
-
res: ProtocolBody[SubDagInfo] = deserialize_serializable(resp.body)
|
|
44
|
-
return res.body
|
|
45
|
-
|
|
46
|
-
async def submit_subdag(
|
|
47
|
-
self, subdag: TileableGraph, settings: Dict[str, Any] = None
|
|
48
|
-
) -> SubDagInfo:
|
|
49
|
-
req_url = f"{self._endpoint}/api/subdags"
|
|
50
|
-
req_body: ProtocolBody[ExecuteSubDagRequest] = ProtocolBody(
|
|
51
|
-
body=ExecuteSubDagRequest(dag=subdag, settings=settings),
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
if self._session_id is not None:
|
|
55
|
-
req_url += f"?session_id={self._session_id}"
|
|
56
|
-
|
|
57
|
-
resp = await httpclient.AsyncHTTPClient().fetch(
|
|
58
|
-
req_url,
|
|
59
|
-
method="POST",
|
|
60
|
-
headers=self._headers,
|
|
61
|
-
body=serialize_serializable(req_body),
|
|
62
|
-
)
|
|
63
|
-
return self._load_subdag_info(resp)
|
|
64
|
-
|
|
65
|
-
async def get_subdag_info(self, subdag_id: str) -> SubDagInfo:
|
|
66
|
-
req_url = f"{self._endpoint}/api/subdags/{subdag_id}?wait=0"
|
|
67
|
-
resp = await httpclient.AsyncHTTPClient().fetch(
|
|
68
|
-
req_url,
|
|
69
|
-
method="GET",
|
|
70
|
-
headers=self._headers,
|
|
71
|
-
)
|
|
72
|
-
return self._load_subdag_info(resp)
|
|
73
|
-
|
|
74
|
-
async def wait_subdag(
|
|
75
|
-
self, subdag_id: str, wait_timeout: TimeoutType = None
|
|
76
|
-
) -> SubDagInfo:
|
|
77
|
-
req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
|
|
78
|
-
params = format_timeout_params(wait_timeout)
|
|
79
|
-
try:
|
|
80
|
-
resp = await wait_http_response(
|
|
81
|
-
req_url + params,
|
|
82
|
-
method="GET",
|
|
83
|
-
headers=self._headers,
|
|
84
|
-
request_timeout=wait_timeout,
|
|
85
|
-
)
|
|
86
|
-
return self._load_subdag_info(resp)
|
|
87
|
-
except TimeoutError:
|
|
88
|
-
return await self.get_subdag_info(subdag_id)
|
|
89
|
-
|
|
90
|
-
async def cancel_subdag(
|
|
91
|
-
self, subdag_id: str, wait_timeout: TimeoutType = None
|
|
92
|
-
) -> SubDagInfo:
|
|
93
|
-
req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
|
|
94
|
-
params = format_timeout_params(wait_timeout)
|
|
95
|
-
try:
|
|
96
|
-
resp = await wait_http_response(
|
|
97
|
-
req_url + params,
|
|
98
|
-
method="DELETE",
|
|
99
|
-
headers=self._headers,
|
|
100
|
-
request_timeout=wait_timeout,
|
|
101
|
-
)
|
|
102
|
-
return self._load_subdag_info(resp)
|
|
103
|
-
except TimeoutError:
|
|
104
|
-
return await self.get_subdag_info(subdag_id)
|
|
File without changes
|
|
File without changes
|