maxframe 0.1.0b2__cp311-cp311-win32.whl → 0.1.0b4__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (42) hide show
  1. maxframe/_utils.cp311-win32.pyd +0 -0
  2. maxframe/codegen.py +88 -19
  3. maxframe/config/config.py +9 -0
  4. maxframe/core/entity/executable.py +1 -0
  5. maxframe/core/entity/objects.py +3 -2
  6. maxframe/core/graph/core.cp311-win32.pyd +0 -0
  7. maxframe/dataframe/__init__.py +7 -1
  8. maxframe/dataframe/core.py +4 -2
  9. maxframe/dataframe/datasource/read_odps_query.py +4 -2
  10. maxframe/dataframe/datasource/read_odps_table.py +3 -1
  11. maxframe/dataframe/datasource/tests/test_datasource.py +22 -0
  12. maxframe/dataframe/datastore/core.py +19 -0
  13. maxframe/dataframe/datastore/to_csv.py +2 -2
  14. maxframe/dataframe/datastore/to_odps.py +2 -2
  15. maxframe/dataframe/groupby/__init__.py +1 -0
  16. maxframe/dataframe/groupby/core.py +5 -0
  17. maxframe/dataframe/indexing/reset_index.py +1 -17
  18. maxframe/lib/aio/isolation.py +6 -1
  19. maxframe/lib/mmh3.cp311-win32.pyd +0 -0
  20. maxframe/odpsio/arrow.py +8 -3
  21. maxframe/odpsio/schema.py +18 -5
  22. maxframe/odpsio/tests/test_schema.py +25 -0
  23. maxframe/opcodes.py +5 -0
  24. maxframe/protocol.py +7 -0
  25. maxframe/serialization/core.cp311-win32.pyd +0 -0
  26. maxframe/serialization/serializables/core.py +6 -1
  27. maxframe/serialization/serializables/field.py +2 -0
  28. maxframe/session.py +4 -2
  29. maxframe/tensor/core.py +3 -3
  30. maxframe/tests/test_codegen.py +69 -0
  31. maxframe/tests/test_protocol.py +16 -8
  32. maxframe/tests/utils.py +1 -0
  33. maxframe/utils.py +20 -1
  34. {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/METADATA +1 -1
  35. {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/RECORD +42 -40
  36. maxframe_client/clients/framedriver.py +7 -7
  37. maxframe_client/session/odps.py +11 -10
  38. maxframe_client/session/task.py +8 -1
  39. maxframe_client/session/tests/test_task.py +29 -11
  40. maxframe_client/tests/test_session.py +23 -0
  41. {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/WHEEL +0 -0
  42. {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@
14
14
 
15
15
  from typing import Any, Dict, List
16
16
 
17
+ import msgpack
17
18
  from tornado import httpclient
18
19
 
19
20
  from maxframe.core import TileableGraph
@@ -28,7 +29,6 @@ from maxframe.protocol import (
28
29
  )
29
30
  from maxframe.typing_ import TimeoutType
30
31
  from maxframe.utils import (
31
- deserialize_serializable,
32
32
  format_timeout_params,
33
33
  serialize_serializable,
34
34
  wait_http_response,
@@ -47,12 +47,12 @@ class FrameDriverClient:
47
47
  resp = await httpclient.AsyncHTTPClient().fetch(
48
48
  req_url, method="POST", body=serialize_serializable(req_body)
49
49
  )
50
- return deserialize_serializable(resp.body).body
50
+ return SessionInfo.from_json(msgpack.loads(resp.body))
51
51
 
52
52
  async def get_session(self, session_id: str) -> SessionInfo:
53
53
  req_url = f"{self._endpoint}/api/sessions/{session_id}"
54
54
  resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
55
- return deserialize_serializable(resp.body).body
55
+ return SessionInfo.from_json(msgpack.loads(resp.body))
56
56
 
57
57
  async def delete_session(self, session_id: str):
58
58
  req_url = f"{self._endpoint}/api/sessions/{session_id}"
@@ -71,12 +71,12 @@ class FrameDriverClient:
71
71
  method="POST",
72
72
  body=serialize_serializable(ProtocolBody(body=req_body)),
73
73
  )
74
- return deserialize_serializable(resp.body).body
74
+ return DagInfo.from_json(msgpack.loads(resp.body))
75
75
 
76
76
  async def get_dag_info(self, session_id: str, dag_id: str) -> DagInfo:
77
77
  req_url = f"{self._endpoint}/api/sessions/{session_id}/dags/{dag_id}"
78
78
  resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
79
- return deserialize_serializable(resp.body).body
79
+ return DagInfo.from_json(msgpack.loads(resp.body))
80
80
 
81
81
  async def wait_dag(self, session_id: str, dag_id: str, timeout: TimeoutType = None):
82
82
  query_part = format_timeout_params(timeout)
@@ -87,7 +87,7 @@ class FrameDriverClient:
87
87
  resp = await wait_http_response(
88
88
  req_url, method="GET", request_timeout=timeout
89
89
  )
90
- info = deserialize_serializable(resp.body).body
90
+ info = DagInfo.from_json(msgpack.loads(resp.body))
91
91
  except TimeoutError:
92
92
  info = await self.get_dag_info(session_id, dag_id)
93
93
  return info
@@ -103,7 +103,7 @@ class FrameDriverClient:
103
103
  resp = await wait_http_response(
104
104
  req_url, method="DELETE", request_timeout=timeout
105
105
  )
106
- info = deserialize_serializable(resp.body).body
106
+ info = DagInfo.from_json(msgpack.loads(resp.body))
107
107
  except TimeoutError:
108
108
  info = await self.get_dag_info(session_id, dag_id)
109
109
  return info
@@ -115,7 +115,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
115
115
  ):
116
116
  super().__init__(address, session_id)
117
117
  self.timeout = timeout
118
- self._odps_entry = odps_entry or ODPS.from_environments()
118
+ self._odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
119
119
  self._tileable_to_infos = weakref.WeakKeyDictionary()
120
120
 
121
121
  self._caller = self._create_caller(odps_entry, address, **kwargs)
@@ -147,15 +147,16 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
147
147
  data = t.op.get_data()
148
148
  batch_size = options.session.upload_batch_size
149
149
 
150
- halo_client = HaloTableIO(self._odps_entry)
151
- with halo_client.open_writer(table_obj.full_table_name) as writer:
152
- for batch_start in range(0, len(data), batch_size):
153
- if isinstance(data, pd.Index):
154
- batch = data[batch_start : batch_start + batch_size]
155
- else:
156
- batch = data.iloc[batch_start : batch_start + batch_size]
157
- arrow_batch, _ = pandas_to_arrow(batch)
158
- writer.write(arrow_batch)
150
+ if len(data):
151
+ halo_client = HaloTableIO(self._odps_entry)
152
+ with halo_client.open_writer(table_obj.full_table_name) as writer:
153
+ for batch_start in range(0, len(data), batch_size):
154
+ if isinstance(data, pd.Index):
155
+ batch = data[batch_start : batch_start + batch_size]
156
+ else:
157
+ batch = data.iloc[batch_start : batch_start + batch_size]
158
+ arrow_batch, _ = pandas_to_arrow(batch)
159
+ writer.write(arrow_batch)
159
160
 
160
161
  read_tileable = read_odps_table(
161
162
  table_obj.full_table_name,
@@ -112,13 +112,19 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
112
112
  odps_entry: ODPS,
113
113
  task_name: Optional[str] = None,
114
114
  project: Optional[str] = None,
115
- priority: Optional[str] = None,
115
+ priority: Optional[int] = None,
116
116
  running_cluster: Optional[str] = None,
117
117
  nested_instance_id: Optional[str] = None,
118
118
  major_version: Optional[str] = None,
119
119
  output_format: Optional[str] = None,
120
120
  **kwargs,
121
121
  ):
122
+ if callable(odps_options.get_priority):
123
+ default_priority = odps_options.get_priority(odps_entry)
124
+ else:
125
+ default_priority = odps_options.priority
126
+ priority = priority if priority is not None else default_priority
127
+
122
128
  self._odps_entry = odps_entry
123
129
  self._task_name = task_name
124
130
  self._project = project
@@ -126,6 +132,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
126
132
  self._running_cluster = running_cluster
127
133
  self._major_version = major_version
128
134
  self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
135
+
129
136
  if nested_instance_id is None:
130
137
  self._nested = False
131
138
  self._instance = None
@@ -18,6 +18,7 @@ import os
18
18
  import mock
19
19
  from defusedxml import ElementTree
20
20
  from odps import ODPS
21
+ from odps import options as odps_options
21
22
 
22
23
  from ...session.consts import MAXFRAME_OUTPUT_JSON_FORMAT
23
24
  from ...session.task import MaxFrameInstanceCaller, MaxFrameTask
@@ -27,17 +28,20 @@ expected_file_dir = os.path.join(os.path.dirname(__file__), "expected-data")
27
28
 
28
29
  def test_maxframe_instance_caller_creating_session():
29
30
  o = ODPS.from_environments()
30
- task_caller = MaxFrameInstanceCaller(
31
- odps_entry=o,
32
- task_name="task_test",
33
- major_version="test_version",
34
- output_format=MAXFRAME_OUTPUT_JSON_FORMAT,
35
- priority="100",
36
- running_cluster="test_cluster",
37
- )
31
+
32
+ def create_caller(**kwargs):
33
+ kw = dict(
34
+ odps_entry=o,
35
+ task_name="task_test",
36
+ major_version="test_version",
37
+ output_format=MAXFRAME_OUTPUT_JSON_FORMAT,
38
+ running_cluster="test_cluster",
39
+ )
40
+ kw.update(**kwargs)
41
+ return MaxFrameInstanceCaller(**kw)
38
42
 
39
43
  def mock_create(self, task: MaxFrameTask, priority=None, running_cluster=None):
40
- assert priority == "100"
44
+ assert priority == 100
41
45
  assert running_cluster == "test_cluster"
42
46
  root = ElementTree.parse(
43
47
  os.path.join(expected_file_dir, "create_session.xml")
@@ -62,6 +66,20 @@ def test_maxframe_instance_caller_creating_session():
62
66
  target="maxframe_client.session.task.MaxFrameInstanceCaller",
63
67
  _wait_instance_task_ready=mock.DEFAULT,
64
68
  get_session=mock.DEFAULT,
65
- ):
66
- with mock.patch("odps.models.instances.BaseInstances.create", mock_create):
69
+ ), mock.patch("odps.models.instances.BaseInstances.create", mock_create):
70
+ task_caller = create_caller(priority=100)
71
+ task_caller.create_session()
72
+
73
+ old_priority = odps_options.priority
74
+ old_get_priority = odps_options.get_priority
75
+ try:
76
+ task_caller = create_caller(priority=100)
77
+ odps_options.priority = 100
78
+ task_caller.create_session()
79
+
80
+ odps_options.priority = None
81
+ odps_options.get_priority = lambda _: 100
67
82
  task_caller.create_session()
83
+ finally:
84
+ odps_options.priority = old_priority
85
+ odps_options.get_priority = old_get_priority
@@ -24,9 +24,11 @@ from odps import ODPS
24
24
  import maxframe.dataframe as md
25
25
  import maxframe.remote as mr
26
26
  from maxframe.core import ExecutableTuple, TileableGraph
27
+ from maxframe.lib.aio import stop_isolation
27
28
  from maxframe.protocol import ResultInfo
28
29
  from maxframe.serialization import RemoteException
29
30
  from maxframe.session import new_session
31
+ from maxframe.tests.utils import tn
30
32
  from maxframe.utils import build_temp_table_name
31
33
  from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
32
34
  framedriver_app,
@@ -52,6 +54,7 @@ def start_mock_session(framedriver_app): # noqa: F811
52
54
  time.sleep(5) # Wait for temp table deleted
53
55
  else:
54
56
  session.reset_default()
57
+ stop_isolation()
55
58
 
56
59
 
57
60
  def test_simple_run_dataframe(start_mock_session):
@@ -113,6 +116,26 @@ def test_simple_run_dataframe(start_mock_session):
113
116
  assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
114
117
 
115
118
 
119
+ def test_run_empty_table(start_mock_session):
120
+ odps_entry = ODPS.from_environments()
121
+
122
+ table_name = tn("test_session_empty_table")
123
+ odps_entry.delete_table(table_name, if_exists=True)
124
+ empty_table = odps_entry.create_table(
125
+ table_name, "_idx_0 bigint, a double, b double", lifecycle=1
126
+ )
127
+ df = md.read_odps_table(table_name, index_col="_idx_0")
128
+ df["d"] = df["a"] + df["b"]
129
+
130
+ executed = df.execute()
131
+ assert "Index: []" in str(executed)
132
+
133
+ fetched = executed.fetch()
134
+ assert 0 == len(fetched)
135
+
136
+ empty_table.drop()
137
+
138
+
116
139
  def test_run_dataframe_with_pd_source(start_mock_session):
117
140
  odps_entry = ODPS.from_environments()
118
141