maxframe 0.1.0b2__cp310-cp310-macosx_10_9_universal2.whl → 0.1.0b4__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/codegen.py +88 -19
- maxframe/config/config.py +9 -0
- maxframe/core/entity/executable.py +1 -0
- maxframe/core/entity/objects.py +3 -2
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/dataframe/__init__.py +7 -1
- maxframe/dataframe/core.py +4 -2
- maxframe/dataframe/datasource/read_odps_query.py +4 -2
- maxframe/dataframe/datasource/read_odps_table.py +3 -1
- maxframe/dataframe/datasource/tests/test_datasource.py +22 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +2 -2
- maxframe/dataframe/datastore/to_odps.py +2 -2
- maxframe/dataframe/groupby/__init__.py +1 -0
- maxframe/dataframe/groupby/core.py +5 -0
- maxframe/dataframe/indexing/reset_index.py +1 -17
- maxframe/lib/aio/isolation.py +6 -1
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/odpsio/arrow.py +8 -3
- maxframe/odpsio/schema.py +18 -5
- maxframe/odpsio/tests/test_schema.py +25 -0
- maxframe/opcodes.py +5 -0
- maxframe/protocol.py +7 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/serializables/core.py +6 -1
- maxframe/serialization/serializables/field.py +2 -0
- maxframe/session.py +4 -2
- maxframe/tensor/core.py +3 -3
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +16 -8
- maxframe/tests/utils.py +1 -0
- maxframe/utils.py +20 -1
- {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/METADATA +1 -1
- {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/RECORD +42 -40
- maxframe_client/clients/framedriver.py +7 -7
- maxframe_client/session/odps.py +11 -10
- maxframe_client/session/task.py +8 -1
- maxframe_client/session/tests/test_task.py +29 -11
- maxframe_client/tests/test_session.py +23 -0
- {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b2.dist-info → maxframe-0.1.0b4.dist-info}/top_level.txt +0 -0
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Dict, List
|
|
16
16
|
|
|
17
|
+
import msgpack
|
|
17
18
|
from tornado import httpclient
|
|
18
19
|
|
|
19
20
|
from maxframe.core import TileableGraph
|
|
@@ -28,7 +29,6 @@ from maxframe.protocol import (
|
|
|
28
29
|
)
|
|
29
30
|
from maxframe.typing_ import TimeoutType
|
|
30
31
|
from maxframe.utils import (
|
|
31
|
-
deserialize_serializable,
|
|
32
32
|
format_timeout_params,
|
|
33
33
|
serialize_serializable,
|
|
34
34
|
wait_http_response,
|
|
@@ -47,12 +47,12 @@ class FrameDriverClient:
|
|
|
47
47
|
resp = await httpclient.AsyncHTTPClient().fetch(
|
|
48
48
|
req_url, method="POST", body=serialize_serializable(req_body)
|
|
49
49
|
)
|
|
50
|
-
return
|
|
50
|
+
return SessionInfo.from_json(msgpack.loads(resp.body))
|
|
51
51
|
|
|
52
52
|
async def get_session(self, session_id: str) -> SessionInfo:
|
|
53
53
|
req_url = f"{self._endpoint}/api/sessions/{session_id}"
|
|
54
54
|
resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
|
|
55
|
-
return
|
|
55
|
+
return SessionInfo.from_json(msgpack.loads(resp.body))
|
|
56
56
|
|
|
57
57
|
async def delete_session(self, session_id: str):
|
|
58
58
|
req_url = f"{self._endpoint}/api/sessions/{session_id}"
|
|
@@ -71,12 +71,12 @@ class FrameDriverClient:
|
|
|
71
71
|
method="POST",
|
|
72
72
|
body=serialize_serializable(ProtocolBody(body=req_body)),
|
|
73
73
|
)
|
|
74
|
-
return
|
|
74
|
+
return DagInfo.from_json(msgpack.loads(resp.body))
|
|
75
75
|
|
|
76
76
|
async def get_dag_info(self, session_id: str, dag_id: str) -> DagInfo:
|
|
77
77
|
req_url = f"{self._endpoint}/api/sessions/{session_id}/dags/{dag_id}"
|
|
78
78
|
resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
|
|
79
|
-
return
|
|
79
|
+
return DagInfo.from_json(msgpack.loads(resp.body))
|
|
80
80
|
|
|
81
81
|
async def wait_dag(self, session_id: str, dag_id: str, timeout: TimeoutType = None):
|
|
82
82
|
query_part = format_timeout_params(timeout)
|
|
@@ -87,7 +87,7 @@ class FrameDriverClient:
|
|
|
87
87
|
resp = await wait_http_response(
|
|
88
88
|
req_url, method="GET", request_timeout=timeout
|
|
89
89
|
)
|
|
90
|
-
info =
|
|
90
|
+
info = DagInfo.from_json(msgpack.loads(resp.body))
|
|
91
91
|
except TimeoutError:
|
|
92
92
|
info = await self.get_dag_info(session_id, dag_id)
|
|
93
93
|
return info
|
|
@@ -103,7 +103,7 @@ class FrameDriverClient:
|
|
|
103
103
|
resp = await wait_http_response(
|
|
104
104
|
req_url, method="DELETE", request_timeout=timeout
|
|
105
105
|
)
|
|
106
|
-
info =
|
|
106
|
+
info = DagInfo.from_json(msgpack.loads(resp.body))
|
|
107
107
|
except TimeoutError:
|
|
108
108
|
info = await self.get_dag_info(session_id, dag_id)
|
|
109
109
|
return info
|
maxframe_client/session/odps.py
CHANGED
|
@@ -115,7 +115,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
115
115
|
):
|
|
116
116
|
super().__init__(address, session_id)
|
|
117
117
|
self.timeout = timeout
|
|
118
|
-
self._odps_entry = odps_entry or ODPS.from_environments()
|
|
118
|
+
self._odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
|
|
119
119
|
self._tileable_to_infos = weakref.WeakKeyDictionary()
|
|
120
120
|
|
|
121
121
|
self._caller = self._create_caller(odps_entry, address, **kwargs)
|
|
@@ -147,15 +147,16 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
147
147
|
data = t.op.get_data()
|
|
148
148
|
batch_size = options.session.upload_batch_size
|
|
149
149
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
150
|
+
if len(data):
|
|
151
|
+
halo_client = HaloTableIO(self._odps_entry)
|
|
152
|
+
with halo_client.open_writer(table_obj.full_table_name) as writer:
|
|
153
|
+
for batch_start in range(0, len(data), batch_size):
|
|
154
|
+
if isinstance(data, pd.Index):
|
|
155
|
+
batch = data[batch_start : batch_start + batch_size]
|
|
156
|
+
else:
|
|
157
|
+
batch = data.iloc[batch_start : batch_start + batch_size]
|
|
158
|
+
arrow_batch, _ = pandas_to_arrow(batch)
|
|
159
|
+
writer.write(arrow_batch)
|
|
159
160
|
|
|
160
161
|
read_tileable = read_odps_table(
|
|
161
162
|
table_obj.full_table_name,
|
maxframe_client/session/task.py
CHANGED
|
@@ -112,13 +112,19 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
112
112
|
odps_entry: ODPS,
|
|
113
113
|
task_name: Optional[str] = None,
|
|
114
114
|
project: Optional[str] = None,
|
|
115
|
-
priority: Optional[
|
|
115
|
+
priority: Optional[int] = None,
|
|
116
116
|
running_cluster: Optional[str] = None,
|
|
117
117
|
nested_instance_id: Optional[str] = None,
|
|
118
118
|
major_version: Optional[str] = None,
|
|
119
119
|
output_format: Optional[str] = None,
|
|
120
120
|
**kwargs,
|
|
121
121
|
):
|
|
122
|
+
if callable(odps_options.get_priority):
|
|
123
|
+
default_priority = odps_options.get_priority(odps_entry)
|
|
124
|
+
else:
|
|
125
|
+
default_priority = odps_options.priority
|
|
126
|
+
priority = priority if priority is not None else default_priority
|
|
127
|
+
|
|
122
128
|
self._odps_entry = odps_entry
|
|
123
129
|
self._task_name = task_name
|
|
124
130
|
self._project = project
|
|
@@ -126,6 +132,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
126
132
|
self._running_cluster = running_cluster
|
|
127
133
|
self._major_version = major_version
|
|
128
134
|
self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
|
|
135
|
+
|
|
129
136
|
if nested_instance_id is None:
|
|
130
137
|
self._nested = False
|
|
131
138
|
self._instance = None
|
|
@@ -18,6 +18,7 @@ import os
|
|
|
18
18
|
import mock
|
|
19
19
|
from defusedxml import ElementTree
|
|
20
20
|
from odps import ODPS
|
|
21
|
+
from odps import options as odps_options
|
|
21
22
|
|
|
22
23
|
from ...session.consts import MAXFRAME_OUTPUT_JSON_FORMAT
|
|
23
24
|
from ...session.task import MaxFrameInstanceCaller, MaxFrameTask
|
|
@@ -27,17 +28,20 @@ expected_file_dir = os.path.join(os.path.dirname(__file__), "expected-data")
|
|
|
27
28
|
|
|
28
29
|
def test_maxframe_instance_caller_creating_session():
|
|
29
30
|
o = ODPS.from_environments()
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
31
|
+
|
|
32
|
+
def create_caller(**kwargs):
|
|
33
|
+
kw = dict(
|
|
34
|
+
odps_entry=o,
|
|
35
|
+
task_name="task_test",
|
|
36
|
+
major_version="test_version",
|
|
37
|
+
output_format=MAXFRAME_OUTPUT_JSON_FORMAT,
|
|
38
|
+
running_cluster="test_cluster",
|
|
39
|
+
)
|
|
40
|
+
kw.update(**kwargs)
|
|
41
|
+
return MaxFrameInstanceCaller(**kw)
|
|
38
42
|
|
|
39
43
|
def mock_create(self, task: MaxFrameTask, priority=None, running_cluster=None):
|
|
40
|
-
assert priority ==
|
|
44
|
+
assert priority == 100
|
|
41
45
|
assert running_cluster == "test_cluster"
|
|
42
46
|
root = ElementTree.parse(
|
|
43
47
|
os.path.join(expected_file_dir, "create_session.xml")
|
|
@@ -62,6 +66,20 @@ def test_maxframe_instance_caller_creating_session():
|
|
|
62
66
|
target="maxframe_client.session.task.MaxFrameInstanceCaller",
|
|
63
67
|
_wait_instance_task_ready=mock.DEFAULT,
|
|
64
68
|
get_session=mock.DEFAULT,
|
|
65
|
-
):
|
|
66
|
-
|
|
69
|
+
), mock.patch("odps.models.instances.BaseInstances.create", mock_create):
|
|
70
|
+
task_caller = create_caller(priority=100)
|
|
71
|
+
task_caller.create_session()
|
|
72
|
+
|
|
73
|
+
old_priority = odps_options.priority
|
|
74
|
+
old_get_priority = odps_options.get_priority
|
|
75
|
+
try:
|
|
76
|
+
task_caller = create_caller(priority=100)
|
|
77
|
+
odps_options.priority = 100
|
|
78
|
+
task_caller.create_session()
|
|
79
|
+
|
|
80
|
+
odps_options.priority = None
|
|
81
|
+
odps_options.get_priority = lambda _: 100
|
|
67
82
|
task_caller.create_session()
|
|
83
|
+
finally:
|
|
84
|
+
odps_options.priority = old_priority
|
|
85
|
+
odps_options.get_priority = old_get_priority
|
|
@@ -24,9 +24,11 @@ from odps import ODPS
|
|
|
24
24
|
import maxframe.dataframe as md
|
|
25
25
|
import maxframe.remote as mr
|
|
26
26
|
from maxframe.core import ExecutableTuple, TileableGraph
|
|
27
|
+
from maxframe.lib.aio import stop_isolation
|
|
27
28
|
from maxframe.protocol import ResultInfo
|
|
28
29
|
from maxframe.serialization import RemoteException
|
|
29
30
|
from maxframe.session import new_session
|
|
31
|
+
from maxframe.tests.utils import tn
|
|
30
32
|
from maxframe.utils import build_temp_table_name
|
|
31
33
|
from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
|
|
32
34
|
framedriver_app,
|
|
@@ -52,6 +54,7 @@ def start_mock_session(framedriver_app): # noqa: F811
|
|
|
52
54
|
time.sleep(5) # Wait for temp table deleted
|
|
53
55
|
else:
|
|
54
56
|
session.reset_default()
|
|
57
|
+
stop_isolation()
|
|
55
58
|
|
|
56
59
|
|
|
57
60
|
def test_simple_run_dataframe(start_mock_session):
|
|
@@ -113,6 +116,26 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
113
116
|
assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
|
|
114
117
|
|
|
115
118
|
|
|
119
|
+
def test_run_empty_table(start_mock_session):
|
|
120
|
+
odps_entry = ODPS.from_environments()
|
|
121
|
+
|
|
122
|
+
table_name = tn("test_session_empty_table")
|
|
123
|
+
odps_entry.delete_table(table_name, if_exists=True)
|
|
124
|
+
empty_table = odps_entry.create_table(
|
|
125
|
+
table_name, "_idx_0 bigint, a double, b double", lifecycle=1
|
|
126
|
+
)
|
|
127
|
+
df = md.read_odps_table(table_name, index_col="_idx_0")
|
|
128
|
+
df["d"] = df["a"] + df["b"]
|
|
129
|
+
|
|
130
|
+
executed = df.execute()
|
|
131
|
+
assert "Index: []" in str(executed)
|
|
132
|
+
|
|
133
|
+
fetched = executed.fetch()
|
|
134
|
+
assert 0 == len(fetched)
|
|
135
|
+
|
|
136
|
+
empty_table.drop()
|
|
137
|
+
|
|
138
|
+
|
|
116
139
|
def test_run_dataframe_with_pd_source(start_mock_session):
|
|
117
140
|
odps_entry = ODPS.from_environments()
|
|
118
141
|
|
|
File without changes
|
|
File without changes
|