maxframe 0.1.0b5__cp310-cp310-macosx_10_9_universal2.whl → 1.0.0rc1__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (47) hide show
  1. maxframe/_utils.cpython-310-darwin.so +0 -0
  2. maxframe/codegen.py +10 -2
  3. maxframe/config/config.py +4 -0
  4. maxframe/core/__init__.py +0 -3
  5. maxframe/core/entity/__init__.py +1 -8
  6. maxframe/core/entity/objects.py +3 -45
  7. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  8. maxframe/core/graph/core.pyx +4 -4
  9. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  10. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  11. maxframe/dataframe/datastore/to_odps.py +21 -0
  12. maxframe/dataframe/indexing/align.py +1 -1
  13. maxframe/dataframe/misc/apply.py +2 -0
  14. maxframe/dataframe/misc/memory_usage.py +2 -2
  15. maxframe/dataframe/misc/tests/test_misc.py +23 -0
  16. maxframe/dataframe/statistics/corr.py +3 -3
  17. maxframe/errors.py +13 -0
  18. maxframe/extension.py +12 -0
  19. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  20. maxframe/lib/mmh3.pyi +43 -0
  21. maxframe/lib/wrapped_pickle.py +2 -1
  22. maxframe/protocol.py +108 -10
  23. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  24. maxframe/serialization/core.pxd +3 -0
  25. maxframe/serialization/core.pyi +3 -0
  26. maxframe/serialization/core.pyx +54 -25
  27. maxframe/serialization/exception.py +1 -1
  28. maxframe/serialization/pandas.py +7 -2
  29. maxframe/serialization/serializables/core.py +119 -12
  30. maxframe/serialization/serializables/tests/test_serializable.py +46 -4
  31. maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
  32. maxframe/tensor/base/atleast_1d.py +1 -1
  33. maxframe/tensor/base/unique.py +1 -1
  34. maxframe/tensor/reduction/count_nonzero.py +1 -1
  35. maxframe/tests/test_protocol.py +34 -0
  36. maxframe/tests/test_utils.py +0 -12
  37. maxframe/tests/utils.py +2 -2
  38. maxframe/utils.py +16 -13
  39. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/METADATA +2 -2
  40. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/RECORD +46 -44
  41. maxframe_client/__init__.py +0 -1
  42. maxframe_client/session/odps.py +45 -5
  43. maxframe_client/session/task.py +41 -20
  44. maxframe_client/tests/test_session.py +36 -0
  45. maxframe_client/clients/spe.py +0 -104
  46. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/WHEEL +0 -0
  47. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -26,6 +26,7 @@ from odps.models import Instance, MaxFrameTask
26
26
 
27
27
  from maxframe.config import options
28
28
  from maxframe.core import TileableGraph
29
+ from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
29
30
  from maxframe.protocol import DagInfo, JsonSerializable, ResultInfo, SessionInfo
30
31
  from maxframe.utils import deserialize_serializable, serialize_serializable, to_str
31
32
 
@@ -82,6 +83,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
82
83
  self._running_cluster = running_cluster
83
84
  self._major_version = major_version
84
85
  self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
86
+ self._deleted = False
85
87
 
86
88
  if nested_instance_id is None:
87
89
  self._nested = False
@@ -94,10 +96,18 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
94
96
  self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
95
97
  ):
96
98
  if isinstance(content, (str, bytes)):
99
+ if len(content) == 0:
100
+ content = "{}"
97
101
  json_data = json.loads(to_str(content))
98
102
  else:
99
103
  json_data = content
100
- result_data = base64.b64decode(json_data["result"])
104
+ encoded_result = json_data.get("result")
105
+ if not encoded_result:
106
+ if self._deleted:
107
+ return None
108
+ else:
109
+ raise SessionAlreadyClosedError(self._instance.id)
110
+ result_data = base64.b64decode(encoded_result)
101
111
  if self._output_format == MAXFRAME_OUTPUT_MAXFRAME_FORMAT:
102
112
  return deserialize_serializable(result_data)
103
113
  elif self._output_format == MAXFRAME_OUTPUT_JSON_FORMAT:
@@ -178,6 +188,14 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
178
188
  time.sleep(interval)
179
189
  interval = min(max_interval, interval * 2)
180
190
 
191
+ def _put_task_info(self, method_name: str, json_data: dict):
192
+ resp_data = self._instance.put_task_info(
193
+ self._task_name, method_name, json.dumps(json_data)
194
+ )
195
+ if not resp_data:
196
+ raise NoTaskServerResponseError(f"No response for request {method_name}")
197
+ return resp_data
198
+
181
199
  def get_session(self) -> SessionInfo:
182
200
  req_data = {"output_format": self._output_format}
183
201
  serialized = self._instance.put_task_info(
@@ -192,11 +210,8 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
192
210
  self._instance.stop()
193
211
  else:
194
212
  req_data = {"output_format": self._output_format}
195
- self._instance.put_task_info(
196
- self._task_name,
197
- MAXFRAME_TASK_DELETE_SESSION_METHOD,
198
- json.dumps(req_data),
199
- )
213
+ self._put_task_info(MAXFRAME_TASK_DELETE_SESSION_METHOD, req_data)
214
+ self._deleted = True
200
215
 
201
216
  def submit_dag(
202
217
  self,
@@ -211,9 +226,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
211
226
  ).decode(),
212
227
  "output_format": self._output_format,
213
228
  }
214
- res = self._instance.put_task_info(
215
- self._task_name, MAXFRAME_TASK_SUBMIT_DAG_METHOD, json.dumps(req_data)
216
- )
229
+ res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
217
230
  return self._deserial_task_info_result(res, DagInfo)
218
231
 
219
232
  def get_dag_info(self, dag_id: str) -> DagInfo:
@@ -222,9 +235,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
222
235
  "dag_id": dag_id,
223
236
  "output_format": self._output_format,
224
237
  }
225
- res = self._instance.put_task_info(
226
- self._task_name, MAXFRAME_TASK_GET_DAG_INFO_METHOD, json.dumps(req_data)
227
- )
238
+ res = self._put_task_info(MAXFRAME_TASK_GET_DAG_INFO_METHOD, req_data)
228
239
  return self._deserial_task_info_result(res, DagInfo)
229
240
 
230
241
  def cancel_dag(self, dag_id: str) -> DagInfo:
@@ -233,23 +244,33 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
233
244
  "dag_id": dag_id,
234
245
  "output_format": self._output_format,
235
246
  }
236
- res = self._instance.put_task_info(
237
- self._task_name, MAXFRAME_TASK_CANCEL_DAG_METHOD, json.dumps(req_data)
238
- )
247
+ res = self._put_task_info(MAXFRAME_TASK_CANCEL_DAG_METHOD, req_data)
239
248
  return self._deserial_task_info_result(res, DagInfo)
240
249
 
241
250
  def decref(self, tileable_keys: List[str]) -> None:
242
251
  req_data = {
243
252
  "tileable_keys": ",".join(tileable_keys),
244
253
  }
245
- self._instance.put_task_info(
246
- self._task_name, MAXFRAME_TASK_DECREF_METHOD, json.dumps(req_data)
247
- )
254
+ self._put_task_info(MAXFRAME_TASK_DECREF_METHOD, req_data)
248
255
 
249
256
  def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
257
+ """
258
+ Generate logview address
259
+
260
+ Parameters
261
+ ----------
262
+ dag_id: id of dag for which dag logview detail page to access
263
+ hours: hours of the logview address auth limit
264
+ Returns
265
+ -------
266
+ Logview address
267
+ """
250
268
  hours = hours or options.session.logview_hours
251
- subquery_suffix = f"&subQuery={dag_id}" if dag_id else ""
252
- return self._instance.get_logview_address(hours) + subquery_suffix
269
+ # notice: maxframe can't reuse subQuery else will conflict with mcqa when fetch resource data,
270
+ # added dagId for maxframe so logview backend will return maxframe data format if
271
+ # instance and dagId is provided.
272
+ dag_suffix = f"&dagId={dag_id}" if dag_id else ""
273
+ return self._instance.get_logview_address(hours) + dag_suffix
253
274
 
254
275
 
255
276
  class MaxFrameTaskSession(MaxFrameSession):
@@ -23,7 +23,10 @@ from odps import ODPS
23
23
 
24
24
  import maxframe.dataframe as md
25
25
  import maxframe.remote as mr
26
+ from maxframe.config import options
27
+ from maxframe.config.config import option_context
26
28
  from maxframe.core import ExecutableTuple, TileableGraph
29
+ from maxframe.errors import NoTaskServerResponseError
27
30
  from maxframe.lib.aio import stop_isolation
28
31
  from maxframe.protocol import ResultInfo
29
32
  from maxframe.serialization import RemoteException
@@ -35,6 +38,7 @@ from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F4
35
38
  )
36
39
 
37
40
  from ..clients.framedriver import FrameDriverClient
41
+ from ..session.odps import MaxFrameRestCaller
38
42
 
39
43
  pytestmark = pytest.mark.maxframe_engine(["MCSQL", "SPE"])
40
44
 
@@ -86,11 +90,25 @@ def test_simple_run_dataframe(start_mock_session):
86
90
  assert len(dag) == 2
87
91
  return await original_submit_dag(self, session_id, dag, managed_input_infos)
88
92
 
93
+ no_task_server_raised = False
94
+ original_get_dag_info = MaxFrameRestCaller.get_dag_info
95
+
96
+ async def patched_get_dag_info(self, dag_id: str):
97
+ nonlocal no_task_server_raised
98
+
99
+ if not no_task_server_raised:
100
+ no_task_server_raised = True
101
+ raise NoTaskServerResponseError
102
+ return await original_get_dag_info(self, dag_id)
103
+
89
104
  df["H"] = "extra_content"
90
105
 
91
106
  with mock.patch(
92
107
  "maxframe_client.clients.framedriver.FrameDriverClient.submit_dag",
93
108
  new=patched_submit_dag,
109
+ ), mock.patch(
110
+ "maxframe_client.session.odps.MaxFrameRestCaller.get_dag_info",
111
+ new=patched_get_dag_info,
94
112
  ):
95
113
  result = df.execute().fetch()
96
114
  assert len(result) == 1000
@@ -184,6 +202,24 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
184
202
  odps_entry.delete_table(table_name, if_exists=True)
185
203
 
186
204
 
205
+ def test_create_session_with_options(framedriver_app): # noqa: F811
206
+ odps_entry = ODPS.from_environments()
207
+ framedriver_addr = f"mf://localhost:{framedriver_app.port}"
208
+ old_value = options.session.max_alive_seconds
209
+ session = None
210
+ try:
211
+ options.session.max_alive_seconds = 10
212
+ session = new_session(framedriver_addr, odps_entry=odps_entry)
213
+ session_id = session.session_id
214
+ session_conf = framedriver_app.session_manager.get_session_settings(session_id)
215
+ with option_context(session_conf) as session_options:
216
+ assert session_options.session.max_alive_seconds == 10
217
+ finally:
218
+ options.session.max_alive_seconds = old_value
219
+ if session is not None:
220
+ session.destroy()
221
+
222
+
187
223
  def test_run_and_fetch_series(start_mock_session):
188
224
  odps_entry = ODPS.from_environments()
189
225
 
@@ -1,104 +0,0 @@
1
- # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import Any, Dict, Optional
16
-
17
- from tornado import httpclient
18
-
19
- from maxframe.core import TileableGraph
20
- from maxframe.protocol import ExecuteSubDagRequest, ProtocolBody, SubDagInfo
21
- from maxframe.typing_ import TimeoutType
22
- from maxframe.utils import (
23
- deserialize_serializable,
24
- format_timeout_params,
25
- serialize_serializable,
26
- wait_http_response,
27
- )
28
-
29
-
30
- class SPEClient:
31
- def __init__(
32
- self,
33
- endpoint: str,
34
- session_id: Optional[str] = None,
35
- host: str = None,
36
- ):
37
- self._endpoint = endpoint.rstrip("/")
38
- self._session_id = session_id
39
- self._headers = {"Host": host}
40
-
41
- @staticmethod
42
- def _load_subdag_info(resp: httpclient.HTTPResponse) -> SubDagInfo:
43
- res: ProtocolBody[SubDagInfo] = deserialize_serializable(resp.body)
44
- return res.body
45
-
46
- async def submit_subdag(
47
- self, subdag: TileableGraph, settings: Dict[str, Any] = None
48
- ) -> SubDagInfo:
49
- req_url = f"{self._endpoint}/api/subdags"
50
- req_body: ProtocolBody[ExecuteSubDagRequest] = ProtocolBody(
51
- body=ExecuteSubDagRequest(dag=subdag, settings=settings),
52
- )
53
-
54
- if self._session_id is not None:
55
- req_url += f"?session_id={self._session_id}"
56
-
57
- resp = await httpclient.AsyncHTTPClient().fetch(
58
- req_url,
59
- method="POST",
60
- headers=self._headers,
61
- body=serialize_serializable(req_body),
62
- )
63
- return self._load_subdag_info(resp)
64
-
65
- async def get_subdag_info(self, subdag_id: str) -> SubDagInfo:
66
- req_url = f"{self._endpoint}/api/subdags/{subdag_id}?wait=0"
67
- resp = await httpclient.AsyncHTTPClient().fetch(
68
- req_url,
69
- method="GET",
70
- headers=self._headers,
71
- )
72
- return self._load_subdag_info(resp)
73
-
74
- async def wait_subdag(
75
- self, subdag_id: str, wait_timeout: TimeoutType = None
76
- ) -> SubDagInfo:
77
- req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
78
- params = format_timeout_params(wait_timeout)
79
- try:
80
- resp = await wait_http_response(
81
- req_url + params,
82
- method="GET",
83
- headers=self._headers,
84
- request_timeout=wait_timeout,
85
- )
86
- return self._load_subdag_info(resp)
87
- except TimeoutError:
88
- return await self.get_subdag_info(subdag_id)
89
-
90
- async def cancel_subdag(
91
- self, subdag_id: str, wait_timeout: TimeoutType = None
92
- ) -> SubDagInfo:
93
- req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
94
- params = format_timeout_params(wait_timeout)
95
- try:
96
- resp = await wait_http_response(
97
- req_url + params,
98
- method="DELETE",
99
- headers=self._headers,
100
- request_timeout=wait_timeout,
101
- )
102
- return self._load_subdag_info(resp)
103
- except TimeoutError:
104
- return await self.get_subdag_info(subdag_id)