maxframe 1.0.0rc4__cp311-cp311-win_amd64.whl → 1.1.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp311-win_amd64.pyd +0 -0
- maxframe/config/config.py +3 -0
- maxframe/conftest.py +9 -2
- maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
- maxframe/core/operator/base.py +2 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
- maxframe/dataframe/core.py +24 -2
- maxframe/dataframe/datasource/read_odps_query.py +63 -34
- maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/apply_chunk.py +649 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +28 -40
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
- maxframe/dataframe/groupby/__init__.py +1 -0
- maxframe/dataframe/groupby/aggregation.py +1 -0
- maxframe/dataframe/groupby/apply.py +9 -1
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
- maxframe/dataframe/groupby/transform.py +8 -2
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +1 -1
- maxframe/dataframe/merge/tests/test_merge.py +3 -1
- maxframe/dataframe/misc/apply.py +3 -0
- maxframe/dataframe/misc/drop_duplicates.py +5 -1
- maxframe/dataframe/misc/map.py +3 -1
- maxframe/dataframe/misc/tests/test_misc.py +24 -2
- maxframe/dataframe/misc/transform.py +22 -13
- maxframe/dataframe/reduction/__init__.py +3 -0
- maxframe/dataframe/reduction/aggregation.py +1 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
- maxframe/dataframe/statistics/quantile.py +8 -2
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_utils.py +60 -0
- maxframe/dataframe/utils.py +110 -7
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +39 -12
- maxframe/io/odpsio/arrow.py +30 -2
- maxframe/io/odpsio/schema.py +23 -5
- maxframe/io/odpsio/tableio.py +26 -110
- maxframe/io/odpsio/tests/test_schema.py +40 -0
- maxframe/io/odpsio/tests/test_tableio.py +5 -5
- maxframe/io/odpsio/tests/test_volumeio.py +35 -11
- maxframe/io/odpsio/volumeio.py +27 -3
- maxframe/learn/contrib/__init__.py +3 -2
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/opcodes.py +7 -1
- maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyx +13 -1
- maxframe/serialization/pandas.py +50 -20
- maxframe/serialization/serializables/core.py +24 -5
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +8 -1
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/tensor/__init__.py +19 -7
- maxframe/tests/utils.py +16 -0
- maxframe/udf.py +27 -0
- maxframe/utils.py +36 -8
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/RECORD +83 -72
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +18 -2
- maxframe_client/session/odps.py +23 -10
- maxframe_client/session/task.py +2 -24
- maxframe_client/session/tests/test_task.py +0 -4
- maxframe_client/tests/test_session.py +30 -10
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
maxframe_client/session/odps.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import abc
|
|
16
16
|
import asyncio
|
|
17
|
+
import copy
|
|
17
18
|
import logging
|
|
18
19
|
import time
|
|
19
20
|
import weakref
|
|
@@ -25,6 +26,7 @@ import numpy as np
|
|
|
25
26
|
import pandas as pd
|
|
26
27
|
from odps import ODPS
|
|
27
28
|
from odps import options as odps_options
|
|
29
|
+
from odps.console import in_ipython_frontend
|
|
28
30
|
|
|
29
31
|
from maxframe.config import options
|
|
30
32
|
from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
|
|
@@ -113,6 +115,8 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
|
113
115
|
mf_settings["session.quota_name"] = quota_name
|
|
114
116
|
if enable_schema is not None:
|
|
115
117
|
mf_settings["session.enable_schema"] = enable_schema
|
|
118
|
+
if options.session.enable_high_availability is None:
|
|
119
|
+
mf_settings["session.enable_high_availability"] = not in_ipython_frontend()
|
|
116
120
|
mf_settings["session.default_schema"] = default_schema or "default"
|
|
117
121
|
return mf_settings
|
|
118
122
|
|
|
@@ -184,6 +188,8 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
184
188
|
|
|
185
189
|
self._caller = self._create_caller(odps_entry, address, **kwargs)
|
|
186
190
|
self._last_settings = None
|
|
191
|
+
self._pull_interval = 1 if in_ipython_frontend() else 3
|
|
192
|
+
self._replace_internal_host = kwargs.get("replace_internal_host", True)
|
|
187
193
|
|
|
188
194
|
@classmethod
|
|
189
195
|
def _create_caller(
|
|
@@ -193,7 +199,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
193
199
|
|
|
194
200
|
async def _init(self, _address: str):
|
|
195
201
|
session_info = await self.ensure_async_call(self._caller.create_session)
|
|
196
|
-
self._last_settings = self._caller.get_settings_to_upload()
|
|
202
|
+
self._last_settings = copy.deepcopy(self._caller.get_settings_to_upload())
|
|
197
203
|
self._session_id = session_info.session_id
|
|
198
204
|
await self._show_logview_address()
|
|
199
205
|
|
|
@@ -250,7 +256,12 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
250
256
|
self, t: TileableType
|
|
251
257
|
) -> Optional[TileableType]:
|
|
252
258
|
vol_name = build_session_volume_name(self.session_id)
|
|
253
|
-
writer = ODPSVolumeWriter(
|
|
259
|
+
writer = ODPSVolumeWriter(
|
|
260
|
+
self._odps_entry,
|
|
261
|
+
vol_name,
|
|
262
|
+
t.key,
|
|
263
|
+
replace_internal_host=self._replace_internal_host,
|
|
264
|
+
)
|
|
254
265
|
io_handler = get_object_io_handler(t)
|
|
255
266
|
io_handler().write_object(writer, t, t.op.data)
|
|
256
267
|
return build_fetch(t).data
|
|
@@ -318,7 +329,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
318
329
|
def _get_diff_settings(self) -> Dict[str, Any]:
|
|
319
330
|
new_settings = self._caller.get_settings_to_upload()
|
|
320
331
|
if not self._last_settings: # pragma: no cover
|
|
321
|
-
self._last_settings = new_settings
|
|
332
|
+
self._last_settings = copy.deepcopy(new_settings)
|
|
322
333
|
return new_settings
|
|
323
334
|
|
|
324
335
|
update = dict()
|
|
@@ -330,7 +341,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
330
341
|
update[k] = new_item
|
|
331
342
|
except: # noqa: E722 # nosec # pylint: disable=bare-except
|
|
332
343
|
update[k] = new_item
|
|
333
|
-
self._last_settings = new_settings
|
|
344
|
+
self._last_settings = copy.deepcopy(new_settings)
|
|
334
345
|
return update
|
|
335
346
|
|
|
336
347
|
async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
|
|
@@ -379,18 +390,18 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
379
390
|
start_time = time.time()
|
|
380
391
|
session_id = dag_info.session_id
|
|
381
392
|
dag_id = dag_info.dag_id
|
|
382
|
-
wait_timeout = 10
|
|
383
393
|
server_no_response_time = None
|
|
384
394
|
with enter_mode(build=True, kernel=True):
|
|
385
395
|
key_to_tileables = {t.key: t for t in tileables}
|
|
386
|
-
|
|
396
|
+
timeout_val = 0.1
|
|
387
397
|
try:
|
|
388
398
|
while True:
|
|
389
399
|
elapsed_time = time.time() - start_time
|
|
400
|
+
next_timeout_val = min(timeout_val * 2, self._pull_interval)
|
|
390
401
|
timeout_val = (
|
|
391
|
-
min(self.timeout - elapsed_time,
|
|
402
|
+
min(self.timeout - elapsed_time, next_timeout_val)
|
|
392
403
|
if self.timeout
|
|
393
|
-
else
|
|
404
|
+
else next_timeout_val
|
|
394
405
|
)
|
|
395
406
|
if timeout_val <= 0:
|
|
396
407
|
raise TimeoutError("Running DAG timed out")
|
|
@@ -584,7 +595,9 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
|
|
|
584
595
|
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
585
596
|
new_settings: Dict[str, Any] = None,
|
|
586
597
|
) -> DagInfo:
|
|
587
|
-
return await self._client.submit_dag(
|
|
598
|
+
return await self._client.submit_dag(
|
|
599
|
+
self._session_id, dag, managed_input_infos, new_settings=new_settings
|
|
600
|
+
)
|
|
588
601
|
|
|
589
602
|
async def get_dag_info(self, dag_id: str) -> DagInfo:
|
|
590
603
|
return await self._client.get_dag_info(self._session_id, dag_id)
|
|
@@ -617,7 +630,7 @@ class MaxFrameRestSession(MaxFrameSession):
|
|
|
617
630
|
real_endpoint = address.replace(f"{parsed_endpoint.scheme}://", f"{scheme}://")
|
|
618
631
|
|
|
619
632
|
super().__init__(
|
|
620
|
-
real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout
|
|
633
|
+
real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout, **kwargs
|
|
621
634
|
)
|
|
622
635
|
|
|
623
636
|
@classmethod
|
maxframe_client/session/task.py
CHANGED
|
@@ -21,15 +21,9 @@ from typing import Any, Dict, List, Optional, Type, Union
|
|
|
21
21
|
import msgpack
|
|
22
22
|
from odps import ODPS
|
|
23
23
|
from odps import options as odps_options
|
|
24
|
-
from odps.errors import parse_instance_error
|
|
24
|
+
from odps.errors import EmptyTaskInfoError, parse_instance_error
|
|
25
25
|
from odps.models import Instance, MaxFrameTask
|
|
26
26
|
|
|
27
|
-
try:
|
|
28
|
-
from odps.errors import EmptyTaskInfoError
|
|
29
|
-
except ImportError: # pragma: no cover
|
|
30
|
-
# todo remove when pyodps>=0.12.0 is enforced
|
|
31
|
-
EmptyTaskInfoError = type("EmptyTaskInfoError", (Exception,), {})
|
|
32
|
-
|
|
33
27
|
from maxframe.config import options
|
|
34
28
|
from maxframe.core import TileableGraph
|
|
35
29
|
from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
|
|
@@ -131,11 +125,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
131
125
|
)
|
|
132
126
|
|
|
133
127
|
def _create_maxframe_task(self) -> MaxFrameTask:
|
|
134
|
-
task = MaxFrameTask(
|
|
135
|
-
name=self._task_name,
|
|
136
|
-
major_version=self._major_version,
|
|
137
|
-
service_endpoint=self._odps_entry.endpoint,
|
|
138
|
-
)
|
|
128
|
+
task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
|
|
139
129
|
mf_opts = {
|
|
140
130
|
"odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
|
|
141
131
|
"odps.maxframe.output_format": self._output_format,
|
|
@@ -200,18 +190,6 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
200
190
|
json.dumps(json_data),
|
|
201
191
|
raise_empty=True,
|
|
202
192
|
)
|
|
203
|
-
except TypeError: # pragma: no cover
|
|
204
|
-
# todo remove when pyodps>=0.12.0 is enforced
|
|
205
|
-
resp_data = self._instance.put_task_info(
|
|
206
|
-
self._task_name, method_name, json.dumps(json_data)
|
|
207
|
-
)
|
|
208
|
-
if resp_data:
|
|
209
|
-
return resp_data
|
|
210
|
-
else:
|
|
211
|
-
raise NoTaskServerResponseError(
|
|
212
|
-
f"No response for request {method_name}. "
|
|
213
|
-
f"Instance ID: {self._instance.id}"
|
|
214
|
-
)
|
|
215
193
|
except EmptyTaskInfoError as ex:
|
|
216
194
|
# retry when server returns HTTP 204, which is designed for retry
|
|
217
195
|
if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
|
|
@@ -53,10 +53,6 @@ def test_maxframe_instance_caller_creating_session():
|
|
|
53
53
|
assert property_node.find("Name").text == "settings"
|
|
54
54
|
setting_dict = json.loads(property_node.find("Value").text)
|
|
55
55
|
assert setting_dict["odps.task.major.version"] == "test_version"
|
|
56
|
-
assert (
|
|
57
|
-
setting_dict["odps.service.endpoint"]
|
|
58
|
-
== "http://100.69.248.78:8002/odps_dailyrunnew"
|
|
59
|
-
)
|
|
60
56
|
|
|
61
57
|
assert setting_dict["odps.maxframe.output_format"] == "json"
|
|
62
58
|
maxframe_setting_dict = json.loads(setting_dict["odps.maxframe.settings"])
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
|
-
from typing import Dict
|
|
16
|
+
from typing import Any, Dict
|
|
17
17
|
|
|
18
18
|
import mock
|
|
19
19
|
import numpy as np
|
|
@@ -31,7 +31,7 @@ from maxframe.lib.aio import stop_isolation
|
|
|
31
31
|
from maxframe.protocol import ResultInfo
|
|
32
32
|
from maxframe.serialization import RemoteException
|
|
33
33
|
from maxframe.session import new_session
|
|
34
|
-
from maxframe.tests.utils import tn
|
|
34
|
+
from maxframe.tests.utils import ensure_table_deleted, tn
|
|
35
35
|
from maxframe.utils import build_temp_table_name
|
|
36
36
|
from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
|
|
37
37
|
framedriver_app,
|
|
@@ -86,9 +86,12 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
86
86
|
session_id: str,
|
|
87
87
|
dag: TileableGraph,
|
|
88
88
|
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
89
|
+
new_settings: Dict[str, Any] = None,
|
|
89
90
|
):
|
|
90
91
|
assert len(dag) == 2
|
|
91
|
-
return await original_submit_dag(
|
|
92
|
+
return await original_submit_dag(
|
|
93
|
+
self, session_id, dag, managed_input_infos, new_settings
|
|
94
|
+
)
|
|
92
95
|
|
|
93
96
|
no_task_server_raised = False
|
|
94
97
|
original_get_dag_info = MaxFrameRestCaller.get_dag_info
|
|
@@ -130,11 +133,10 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
130
133
|
)
|
|
131
134
|
assert odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
|
|
132
135
|
del df
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
build_temp_table_name(start_mock_session, intermediate_key)
|
|
136
|
+
ensure_table_deleted(
|
|
137
|
+
odps_entry, build_temp_table_name(start_mock_session, intermediate_key)
|
|
136
138
|
)
|
|
137
|
-
|
|
139
|
+
ensure_table_deleted(odps_entry, build_temp_table_name(start_mock_session, key))
|
|
138
140
|
|
|
139
141
|
|
|
140
142
|
def test_run_and_fetch_slice(start_mock_session):
|
|
@@ -166,6 +168,25 @@ def test_run_empty_table(start_mock_session):
|
|
|
166
168
|
empty_table.drop()
|
|
167
169
|
|
|
168
170
|
|
|
171
|
+
def test_run_odps_query_without_schema(start_mock_session):
|
|
172
|
+
odps_entry = ODPS.from_environments()
|
|
173
|
+
|
|
174
|
+
table_name = tn("test_query_without_schema")
|
|
175
|
+
odps_entry.delete_table(table_name, if_exists=True)
|
|
176
|
+
test_table = odps_entry.create_table(table_name, "a double, b double", lifecycle=1)
|
|
177
|
+
|
|
178
|
+
with test_table.open_writer() as writer:
|
|
179
|
+
writer.write([123, 456])
|
|
180
|
+
|
|
181
|
+
df = md.read_odps_query(
|
|
182
|
+
f"select a, b, a + b as `special: name` from {table_name}", skip_schema=True
|
|
183
|
+
)
|
|
184
|
+
executed = df.execute().fetch()
|
|
185
|
+
assert len(executed.dtypes) == 3
|
|
186
|
+
|
|
187
|
+
test_table.drop()
|
|
188
|
+
|
|
189
|
+
|
|
169
190
|
def test_run_dataframe_with_pd_source(start_mock_session):
|
|
170
191
|
odps_entry = ODPS.from_environments()
|
|
171
192
|
|
|
@@ -246,9 +267,8 @@ def test_run_and_fetch_series(start_mock_session):
|
|
|
246
267
|
pd.testing.assert_series_equal(pd_result, result)
|
|
247
268
|
|
|
248
269
|
del s1
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
build_temp_table_name(start_mock_session, src_key)
|
|
270
|
+
ensure_table_deleted(
|
|
271
|
+
odps_entry, build_temp_table_name(start_mock_session, src_key)
|
|
252
272
|
)
|
|
253
273
|
finally:
|
|
254
274
|
odps_entry.delete_table(
|
|
File without changes
|