maxframe 0.1.0b4__cp38-cp38-win32.whl → 1.0.0rc1__cp38-cp38-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp38-win32.pyd +0 -0
- maxframe/codegen.py +56 -3
- maxframe/config/config.py +15 -1
- maxframe/core/__init__.py +0 -3
- maxframe/core/entity/__init__.py +1 -8
- maxframe/core/entity/objects.py +3 -45
- maxframe/core/graph/core.cp38-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/core.py +30 -8
- maxframe/dataframe/datasource/read_odps_query.py +3 -1
- maxframe/dataframe/datasource/read_odps_table.py +3 -1
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +21 -0
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +3 -1
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +84 -0
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +2 -1
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/utils.py +7 -0
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp38-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/odpsio/arrow.py +2 -3
- maxframe/odpsio/tableio.py +22 -0
- maxframe/odpsio/tests/test_schema.py +16 -11
- maxframe/opcodes.py +3 -0
- maxframe/protocol.py +108 -10
- maxframe/serialization/core.cp38-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +54 -25
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +7 -2
- maxframe/serialization/serializables/core.py +119 -12
- maxframe/serialization/serializables/tests/test_serializable.py +46 -4
- maxframe/session.py +28 -0
- maxframe/tensor/__init__.py +1 -1
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
- maxframe/tensor/base/__init__.py +2 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/datasource/array.py +4 -2
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +1 -1
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +2 -2
- maxframe/udf.py +63 -3
- maxframe/utils.py +22 -13
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/METADATA +3 -3
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/RECORD +80 -61
- maxframe_client/__init__.py +0 -1
- maxframe_client/fetcher.py +65 -3
- maxframe_client/session/odps.py +74 -5
- maxframe_client/session/task.py +65 -71
- maxframe_client/tests/test_session.py +64 -1
- maxframe_client/clients/spe.py +0 -104
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/top_level.txt +0 -0
maxframe_client/session/task.py
CHANGED
|
@@ -21,12 +21,12 @@ from typing import Dict, List, Optional, Type, Union
|
|
|
21
21
|
import msgpack
|
|
22
22
|
from odps import ODPS
|
|
23
23
|
from odps import options as odps_options
|
|
24
|
-
from odps import serializers
|
|
25
24
|
from odps.errors import parse_instance_error
|
|
26
|
-
from odps.models import Instance,
|
|
25
|
+
from odps.models import Instance, MaxFrameTask
|
|
27
26
|
|
|
28
27
|
from maxframe.config import options
|
|
29
28
|
from maxframe.core import TileableGraph
|
|
29
|
+
from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
|
|
30
30
|
from maxframe.protocol import DagInfo, JsonSerializable, ResultInfo, SessionInfo
|
|
31
31
|
from maxframe.utils import deserialize_serializable, serialize_serializable, to_str
|
|
32
32
|
|
|
@@ -55,55 +55,6 @@ from .odps import MaxFrameServiceCaller, MaxFrameSession
|
|
|
55
55
|
logger = logging.getLogger(__name__)
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
class MaxFrameTask(Task):
|
|
59
|
-
__slots__ = ("_output_format", "_major_version", "_service_endpoint")
|
|
60
|
-
_root = "MaxFrame"
|
|
61
|
-
_anonymous_task_name = "AnonymousMaxFrameTask"
|
|
62
|
-
|
|
63
|
-
command = serializers.XMLNodeField("Command", default="CREATE_SESSION")
|
|
64
|
-
|
|
65
|
-
def __init__(self, **kwargs):
|
|
66
|
-
kwargs["name"] = kwargs.get("name") or self._anonymous_task_name
|
|
67
|
-
self._output_format = kwargs.pop(
|
|
68
|
-
"output_format", MAXFRAME_OUTPUT_MSGPACK_FORMAT
|
|
69
|
-
)
|
|
70
|
-
self._major_version = kwargs.pop("major_version", None)
|
|
71
|
-
self._service_endpoint = kwargs.pop("service_endpoint", None)
|
|
72
|
-
super().__init__(**kwargs)
|
|
73
|
-
|
|
74
|
-
def serial(self):
|
|
75
|
-
if self.properties is None:
|
|
76
|
-
self.properties = dict()
|
|
77
|
-
|
|
78
|
-
if odps_options.default_task_settings:
|
|
79
|
-
settings = odps_options.default_task_settings
|
|
80
|
-
else:
|
|
81
|
-
settings = dict()
|
|
82
|
-
|
|
83
|
-
if self._major_version is not None:
|
|
84
|
-
settings["odps.task.major.version"] = self._major_version
|
|
85
|
-
|
|
86
|
-
if "settings" in self.properties:
|
|
87
|
-
settings.update(json.loads(self.properties["settings"]))
|
|
88
|
-
|
|
89
|
-
# merge sql options
|
|
90
|
-
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
91
|
-
sql_settings.update(options.sql.settings or {})
|
|
92
|
-
|
|
93
|
-
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
94
|
-
mf_settings["sql.settings"] = sql_settings
|
|
95
|
-
mf_opts = {
|
|
96
|
-
"odps.maxframe.settings": json.dumps(mf_settings),
|
|
97
|
-
"odps.maxframe.output_format": self._output_format,
|
|
98
|
-
"odps.service.endpoint": self._service_endpoint,
|
|
99
|
-
}
|
|
100
|
-
if mf_version:
|
|
101
|
-
mf_opts["odps.maxframe.client_version"] = mf_version
|
|
102
|
-
settings.update(mf_opts)
|
|
103
|
-
self.properties["settings"] = json.dumps(settings)
|
|
104
|
-
return super().serial()
|
|
105
|
-
|
|
106
|
-
|
|
107
58
|
class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
108
59
|
_instance: Optional[Instance]
|
|
109
60
|
|
|
@@ -132,6 +83,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
132
83
|
self._running_cluster = running_cluster
|
|
133
84
|
self._major_version = major_version
|
|
134
85
|
self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
|
|
86
|
+
self._deleted = False
|
|
135
87
|
|
|
136
88
|
if nested_instance_id is None:
|
|
137
89
|
self._nested = False
|
|
@@ -144,10 +96,18 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
144
96
|
self, content: Union[bytes, str, dict], target_cls: Type[JsonSerializable]
|
|
145
97
|
):
|
|
146
98
|
if isinstance(content, (str, bytes)):
|
|
99
|
+
if len(content) == 0:
|
|
100
|
+
content = "{}"
|
|
147
101
|
json_data = json.loads(to_str(content))
|
|
148
102
|
else:
|
|
149
103
|
json_data = content
|
|
150
|
-
|
|
104
|
+
encoded_result = json_data.get("result")
|
|
105
|
+
if not encoded_result:
|
|
106
|
+
if self._deleted:
|
|
107
|
+
return None
|
|
108
|
+
else:
|
|
109
|
+
raise SessionAlreadyClosedError(self._instance.id)
|
|
110
|
+
result_data = base64.b64decode(encoded_result)
|
|
151
111
|
if self._output_format == MAXFRAME_OUTPUT_MAXFRAME_FORMAT:
|
|
152
112
|
return deserialize_serializable(result_data)
|
|
153
113
|
elif self._output_format == MAXFRAME_OUTPUT_JSON_FORMAT:
|
|
@@ -159,13 +119,31 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
159
119
|
f"Serialization format {self._output_format} not supported"
|
|
160
120
|
)
|
|
161
121
|
|
|
162
|
-
def
|
|
122
|
+
def _create_maxframe_task(self) -> MaxFrameTask:
|
|
163
123
|
task = MaxFrameTask(
|
|
164
124
|
name=self._task_name,
|
|
165
125
|
major_version=self._major_version,
|
|
166
|
-
output_format=self._output_format,
|
|
167
126
|
service_endpoint=self._odps_entry.endpoint,
|
|
168
127
|
)
|
|
128
|
+
|
|
129
|
+
# merge sql options
|
|
130
|
+
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
131
|
+
sql_settings.update(options.sql.settings or {})
|
|
132
|
+
|
|
133
|
+
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
134
|
+
mf_settings["sql.settings"] = sql_settings
|
|
135
|
+
|
|
136
|
+
mf_opts = {
|
|
137
|
+
"odps.maxframe.settings": json.dumps(mf_settings),
|
|
138
|
+
"odps.maxframe.output_format": self._output_format,
|
|
139
|
+
}
|
|
140
|
+
if mf_version:
|
|
141
|
+
mf_opts["odps.maxframe.client_version"] = mf_version
|
|
142
|
+
task.update_settings(mf_opts)
|
|
143
|
+
return task
|
|
144
|
+
|
|
145
|
+
def create_session(self) -> SessionInfo:
|
|
146
|
+
task = self._create_maxframe_task()
|
|
169
147
|
if not self._nested:
|
|
170
148
|
self._task_name = task.name
|
|
171
149
|
project = self._odps_entry.get_project(self._project)
|
|
@@ -210,6 +188,14 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
210
188
|
time.sleep(interval)
|
|
211
189
|
interval = min(max_interval, interval * 2)
|
|
212
190
|
|
|
191
|
+
def _put_task_info(self, method_name: str, json_data: dict):
|
|
192
|
+
resp_data = self._instance.put_task_info(
|
|
193
|
+
self._task_name, method_name, json.dumps(json_data)
|
|
194
|
+
)
|
|
195
|
+
if not resp_data:
|
|
196
|
+
raise NoTaskServerResponseError(f"No response for request {method_name}")
|
|
197
|
+
return resp_data
|
|
198
|
+
|
|
213
199
|
def get_session(self) -> SessionInfo:
|
|
214
200
|
req_data = {"output_format": self._output_format}
|
|
215
201
|
serialized = self._instance.put_task_info(
|
|
@@ -224,11 +210,8 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
224
210
|
self._instance.stop()
|
|
225
211
|
else:
|
|
226
212
|
req_data = {"output_format": self._output_format}
|
|
227
|
-
self.
|
|
228
|
-
|
|
229
|
-
MAXFRAME_TASK_DELETE_SESSION_METHOD,
|
|
230
|
-
json.dumps(req_data),
|
|
231
|
-
)
|
|
213
|
+
self._put_task_info(MAXFRAME_TASK_DELETE_SESSION_METHOD, req_data)
|
|
214
|
+
self._deleted = True
|
|
232
215
|
|
|
233
216
|
def submit_dag(
|
|
234
217
|
self,
|
|
@@ -243,9 +226,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
243
226
|
).decode(),
|
|
244
227
|
"output_format": self._output_format,
|
|
245
228
|
}
|
|
246
|
-
res = self.
|
|
247
|
-
self._task_name, MAXFRAME_TASK_SUBMIT_DAG_METHOD, json.dumps(req_data)
|
|
248
|
-
)
|
|
229
|
+
res = self._put_task_info(MAXFRAME_TASK_SUBMIT_DAG_METHOD, req_data)
|
|
249
230
|
return self._deserial_task_info_result(res, DagInfo)
|
|
250
231
|
|
|
251
232
|
def get_dag_info(self, dag_id: str) -> DagInfo:
|
|
@@ -254,9 +235,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
254
235
|
"dag_id": dag_id,
|
|
255
236
|
"output_format": self._output_format,
|
|
256
237
|
}
|
|
257
|
-
res = self.
|
|
258
|
-
self._task_name, MAXFRAME_TASK_GET_DAG_INFO_METHOD, json.dumps(req_data)
|
|
259
|
-
)
|
|
238
|
+
res = self._put_task_info(MAXFRAME_TASK_GET_DAG_INFO_METHOD, req_data)
|
|
260
239
|
return self._deserial_task_info_result(res, DagInfo)
|
|
261
240
|
|
|
262
241
|
def cancel_dag(self, dag_id: str) -> DagInfo:
|
|
@@ -265,18 +244,33 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
265
244
|
"dag_id": dag_id,
|
|
266
245
|
"output_format": self._output_format,
|
|
267
246
|
}
|
|
268
|
-
res = self.
|
|
269
|
-
self._task_name, MAXFRAME_TASK_CANCEL_DAG_METHOD, json.dumps(req_data)
|
|
270
|
-
)
|
|
247
|
+
res = self._put_task_info(MAXFRAME_TASK_CANCEL_DAG_METHOD, req_data)
|
|
271
248
|
return self._deserial_task_info_result(res, DagInfo)
|
|
272
249
|
|
|
273
250
|
def decref(self, tileable_keys: List[str]) -> None:
|
|
274
251
|
req_data = {
|
|
275
252
|
"tileable_keys": ",".join(tileable_keys),
|
|
276
253
|
}
|
|
277
|
-
self.
|
|
278
|
-
|
|
279
|
-
|
|
254
|
+
self._put_task_info(MAXFRAME_TASK_DECREF_METHOD, req_data)
|
|
255
|
+
|
|
256
|
+
def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
|
|
257
|
+
"""
|
|
258
|
+
Generate logview address
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
dag_id: id of dag for which dag logview detail page to access
|
|
263
|
+
hours: hours of the logview address auth limit
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
Logview address
|
|
267
|
+
"""
|
|
268
|
+
hours = hours or options.session.logview_hours
|
|
269
|
+
# notice: maxframe can't reuse subQuery else will conflict with mcqa when fetch resource data,
|
|
270
|
+
# added dagId for maxframe so logview backend will return maxframe data format if
|
|
271
|
+
# instance and dagId is provided.
|
|
272
|
+
dag_suffix = f"&dagId={dag_id}" if dag_id else ""
|
|
273
|
+
return self._instance.get_logview_address(hours) + dag_suffix
|
|
280
274
|
|
|
281
275
|
|
|
282
276
|
class MaxFrameTaskSession(MaxFrameSession):
|
|
@@ -23,7 +23,10 @@ from odps import ODPS
|
|
|
23
23
|
|
|
24
24
|
import maxframe.dataframe as md
|
|
25
25
|
import maxframe.remote as mr
|
|
26
|
+
from maxframe.config import options
|
|
27
|
+
from maxframe.config.config import option_context
|
|
26
28
|
from maxframe.core import ExecutableTuple, TileableGraph
|
|
29
|
+
from maxframe.errors import NoTaskServerResponseError
|
|
27
30
|
from maxframe.lib.aio import stop_isolation
|
|
28
31
|
from maxframe.protocol import ResultInfo
|
|
29
32
|
from maxframe.serialization import RemoteException
|
|
@@ -35,6 +38,7 @@ from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F4
|
|
|
35
38
|
)
|
|
36
39
|
|
|
37
40
|
from ..clients.framedriver import FrameDriverClient
|
|
41
|
+
from ..session.odps import MaxFrameRestCaller
|
|
38
42
|
|
|
39
43
|
pytestmark = pytest.mark.maxframe_engine(["MCSQL", "SPE"])
|
|
40
44
|
|
|
@@ -86,11 +90,25 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
86
90
|
assert len(dag) == 2
|
|
87
91
|
return await original_submit_dag(self, session_id, dag, managed_input_infos)
|
|
88
92
|
|
|
93
|
+
no_task_server_raised = False
|
|
94
|
+
original_get_dag_info = MaxFrameRestCaller.get_dag_info
|
|
95
|
+
|
|
96
|
+
async def patched_get_dag_info(self, dag_id: str):
|
|
97
|
+
nonlocal no_task_server_raised
|
|
98
|
+
|
|
99
|
+
if not no_task_server_raised:
|
|
100
|
+
no_task_server_raised = True
|
|
101
|
+
raise NoTaskServerResponseError
|
|
102
|
+
return await original_get_dag_info(self, dag_id)
|
|
103
|
+
|
|
89
104
|
df["H"] = "extra_content"
|
|
90
105
|
|
|
91
106
|
with mock.patch(
|
|
92
107
|
"maxframe_client.clients.framedriver.FrameDriverClient.submit_dag",
|
|
93
108
|
new=patched_submit_dag,
|
|
109
|
+
), mock.patch(
|
|
110
|
+
"maxframe_client.session.odps.MaxFrameRestCaller.get_dag_info",
|
|
111
|
+
new=patched_get_dag_info,
|
|
94
112
|
):
|
|
95
113
|
result = df.execute().fetch()
|
|
96
114
|
assert len(result) == 1000
|
|
@@ -99,9 +117,12 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
99
117
|
corner_top, corner_bottom = ExecutableTuple([df.iloc[:10], df.iloc[-10:]]).fetch()
|
|
100
118
|
assert len(corner_top) == len(corner_bottom) == 10
|
|
101
119
|
|
|
102
|
-
# check ellipsis mark in DataFrame
|
|
120
|
+
# check ellipsis mark in DataFrame reprs
|
|
103
121
|
df_str_repr = str(df)
|
|
104
122
|
assert ".." in df_str_repr
|
|
123
|
+
# check ellipsis mark in Series reprs
|
|
124
|
+
series_str_repr = str(df.A.execute())
|
|
125
|
+
assert ".." in series_str_repr
|
|
105
126
|
|
|
106
127
|
key = df.key
|
|
107
128
|
assert odps_entry.exist_table(
|
|
@@ -181,6 +202,24 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
|
|
|
181
202
|
odps_entry.delete_table(table_name, if_exists=True)
|
|
182
203
|
|
|
183
204
|
|
|
205
|
+
def test_create_session_with_options(framedriver_app): # noqa: F811
|
|
206
|
+
odps_entry = ODPS.from_environments()
|
|
207
|
+
framedriver_addr = f"mf://localhost:{framedriver_app.port}"
|
|
208
|
+
old_value = options.session.max_alive_seconds
|
|
209
|
+
session = None
|
|
210
|
+
try:
|
|
211
|
+
options.session.max_alive_seconds = 10
|
|
212
|
+
session = new_session(framedriver_addr, odps_entry=odps_entry)
|
|
213
|
+
session_id = session.session_id
|
|
214
|
+
session_conf = framedriver_app.session_manager.get_session_settings(session_id)
|
|
215
|
+
with option_context(session_conf) as session_options:
|
|
216
|
+
assert session_options.session.max_alive_seconds == 10
|
|
217
|
+
finally:
|
|
218
|
+
options.session.max_alive_seconds = old_value
|
|
219
|
+
if session is not None:
|
|
220
|
+
session.destroy()
|
|
221
|
+
|
|
222
|
+
|
|
184
223
|
def test_run_and_fetch_series(start_mock_session):
|
|
185
224
|
odps_entry = ODPS.from_environments()
|
|
186
225
|
|
|
@@ -226,3 +265,27 @@ def test_run_remote_error(start_mock_session):
|
|
|
226
265
|
|
|
227
266
|
with pytest.raises((ValueError, RemoteException)):
|
|
228
267
|
v.execute()
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def test_pivot_dataframe(start_mock_session):
|
|
271
|
+
pd_df = pd.DataFrame(
|
|
272
|
+
{
|
|
273
|
+
"A": "foo foo foo foo foo bar bar bar bar".split(),
|
|
274
|
+
"B": "one one one two two one one two two".split(),
|
|
275
|
+
"C": "small large large small small large small small large".split(),
|
|
276
|
+
"D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
|
|
277
|
+
"E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
|
|
278
|
+
}
|
|
279
|
+
)
|
|
280
|
+
df = md.DataFrame(pd_df)
|
|
281
|
+
pivot = df.pivot_table(values="D", index=["A", "B"], columns=["C"], aggfunc="sum")
|
|
282
|
+
executed = pivot.execute()
|
|
283
|
+
assert pivot.shape == (2, 4)
|
|
284
|
+
pd.testing.assert_index_equal(
|
|
285
|
+
pivot.dtypes.index, pd.Index(["large", "small"], name="C")
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
expected = pd_df.pivot_table(
|
|
289
|
+
values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
|
|
290
|
+
)
|
|
291
|
+
pd.testing.assert_frame_equal(executed.to_pandas(), expected)
|
maxframe_client/clients/spe.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from typing import Any, Dict, Optional
|
|
16
|
-
|
|
17
|
-
from tornado import httpclient
|
|
18
|
-
|
|
19
|
-
from maxframe.core import TileableGraph
|
|
20
|
-
from maxframe.protocol import ExecuteSubDagRequest, ProtocolBody, SubDagInfo
|
|
21
|
-
from maxframe.typing_ import TimeoutType
|
|
22
|
-
from maxframe.utils import (
|
|
23
|
-
deserialize_serializable,
|
|
24
|
-
format_timeout_params,
|
|
25
|
-
serialize_serializable,
|
|
26
|
-
wait_http_response,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class SPEClient:
|
|
31
|
-
def __init__(
|
|
32
|
-
self,
|
|
33
|
-
endpoint: str,
|
|
34
|
-
session_id: Optional[str] = None,
|
|
35
|
-
host: str = None,
|
|
36
|
-
):
|
|
37
|
-
self._endpoint = endpoint.rstrip("/")
|
|
38
|
-
self._session_id = session_id
|
|
39
|
-
self._headers = {"Host": host}
|
|
40
|
-
|
|
41
|
-
@staticmethod
|
|
42
|
-
def _load_subdag_info(resp: httpclient.HTTPResponse) -> SubDagInfo:
|
|
43
|
-
res: ProtocolBody[SubDagInfo] = deserialize_serializable(resp.body)
|
|
44
|
-
return res.body
|
|
45
|
-
|
|
46
|
-
async def submit_subdag(
|
|
47
|
-
self, subdag: TileableGraph, settings: Dict[str, Any] = None
|
|
48
|
-
) -> SubDagInfo:
|
|
49
|
-
req_url = f"{self._endpoint}/api/subdags"
|
|
50
|
-
req_body: ProtocolBody[ExecuteSubDagRequest] = ProtocolBody(
|
|
51
|
-
body=ExecuteSubDagRequest(dag=subdag, settings=settings),
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
if self._session_id is not None:
|
|
55
|
-
req_url += f"?session_id={self._session_id}"
|
|
56
|
-
|
|
57
|
-
resp = await httpclient.AsyncHTTPClient().fetch(
|
|
58
|
-
req_url,
|
|
59
|
-
method="POST",
|
|
60
|
-
headers=self._headers,
|
|
61
|
-
body=serialize_serializable(req_body),
|
|
62
|
-
)
|
|
63
|
-
return self._load_subdag_info(resp)
|
|
64
|
-
|
|
65
|
-
async def get_subdag_info(self, subdag_id: str) -> SubDagInfo:
|
|
66
|
-
req_url = f"{self._endpoint}/api/subdags/{subdag_id}?wait=0"
|
|
67
|
-
resp = await httpclient.AsyncHTTPClient().fetch(
|
|
68
|
-
req_url,
|
|
69
|
-
method="GET",
|
|
70
|
-
headers=self._headers,
|
|
71
|
-
)
|
|
72
|
-
return self._load_subdag_info(resp)
|
|
73
|
-
|
|
74
|
-
async def wait_subdag(
|
|
75
|
-
self, subdag_id: str, wait_timeout: TimeoutType = None
|
|
76
|
-
) -> SubDagInfo:
|
|
77
|
-
req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
|
|
78
|
-
params = format_timeout_params(wait_timeout)
|
|
79
|
-
try:
|
|
80
|
-
resp = await wait_http_response(
|
|
81
|
-
req_url + params,
|
|
82
|
-
method="GET",
|
|
83
|
-
headers=self._headers,
|
|
84
|
-
request_timeout=wait_timeout,
|
|
85
|
-
)
|
|
86
|
-
return self._load_subdag_info(resp)
|
|
87
|
-
except TimeoutError:
|
|
88
|
-
return await self.get_subdag_info(subdag_id)
|
|
89
|
-
|
|
90
|
-
async def cancel_subdag(
|
|
91
|
-
self, subdag_id: str, wait_timeout: TimeoutType = None
|
|
92
|
-
) -> SubDagInfo:
|
|
93
|
-
req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
|
|
94
|
-
params = format_timeout_params(wait_timeout)
|
|
95
|
-
try:
|
|
96
|
-
resp = await wait_http_response(
|
|
97
|
-
req_url + params,
|
|
98
|
-
method="DELETE",
|
|
99
|
-
headers=self._headers,
|
|
100
|
-
request_timeout=wait_timeout,
|
|
101
|
-
)
|
|
102
|
-
return self._load_subdag_info(resp)
|
|
103
|
-
except TimeoutError:
|
|
104
|
-
return await self.get_subdag_info(subdag_id)
|
|
File without changes
|
|
File without changes
|