maxframe 1.0.0rc4__cp310-cp310-macosx_10_9_universal2.whl → 1.1.1__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +26 -0
- maxframe/config/tests/test_config.py +20 -1
- maxframe/conftest.py +17 -4
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/operator/base.py +2 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
- maxframe/dataframe/core.py +24 -2
- maxframe/dataframe/datasource/read_odps_query.py +65 -35
- maxframe/dataframe/datasource/read_odps_table.py +4 -2
- maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/apply_chunk.py +649 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +28 -40
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
- maxframe/dataframe/groupby/__init__.py +1 -0
- maxframe/dataframe/groupby/aggregation.py +1 -0
- maxframe/dataframe/groupby/apply.py +9 -1
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
- maxframe/dataframe/groupby/transform.py +8 -2
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +1 -1
- maxframe/dataframe/merge/tests/test_merge.py +3 -1
- maxframe/dataframe/misc/apply.py +3 -0
- maxframe/dataframe/misc/drop_duplicates.py +5 -1
- maxframe/dataframe/misc/map.py +3 -1
- maxframe/dataframe/misc/tests/test_misc.py +24 -2
- maxframe/dataframe/misc/transform.py +22 -13
- maxframe/dataframe/reduction/__init__.py +3 -0
- maxframe/dataframe/reduction/aggregation.py +1 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
- maxframe/dataframe/statistics/quantile.py +8 -2
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_utils.py +60 -0
- maxframe/dataframe/utils.py +110 -7
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +39 -12
- maxframe/io/odpsio/__init__.py +1 -1
- maxframe/io/odpsio/arrow.py +51 -2
- maxframe/io/odpsio/schema.py +23 -5
- maxframe/io/odpsio/tableio.py +80 -124
- maxframe/io/odpsio/tests/test_schema.py +40 -0
- maxframe/io/odpsio/tests/test_tableio.py +5 -5
- maxframe/io/odpsio/tests/test_volumeio.py +35 -11
- maxframe/io/odpsio/volumeio.py +27 -3
- maxframe/learn/contrib/__init__.py +3 -2
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/opcodes.py +7 -1
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pyx +13 -1
- maxframe/serialization/pandas.py +50 -20
- maxframe/serialization/serializables/core.py +70 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +12 -2
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/tensor/__init__.py +19 -7
- maxframe/tensor/merge/vstack.py +1 -1
- maxframe/tests/utils.py +16 -0
- maxframe/udf.py +27 -0
- maxframe/utils.py +42 -8
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/METADATA +4 -4
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/RECORD +88 -77
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +23 -8
- maxframe_client/session/odps.py +40 -11
- maxframe_client/session/task.py +6 -25
- maxframe_client/session/tests/test_task.py +35 -6
- maxframe_client/tests/test_session.py +30 -10
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/top_level.txt +0 -0
maxframe_client/session/task.py
CHANGED
|
@@ -21,15 +21,9 @@ from typing import Any, Dict, List, Optional, Type, Union
|
|
|
21
21
|
import msgpack
|
|
22
22
|
from odps import ODPS
|
|
23
23
|
from odps import options as odps_options
|
|
24
|
-
from odps.errors import parse_instance_error
|
|
24
|
+
from odps.errors import EmptyTaskInfoError, parse_instance_error
|
|
25
25
|
from odps.models import Instance, MaxFrameTask
|
|
26
26
|
|
|
27
|
-
try:
|
|
28
|
-
from odps.errors import EmptyTaskInfoError
|
|
29
|
-
except ImportError: # pragma: no cover
|
|
30
|
-
# todo remove when pyodps>=0.12.0 is enforced
|
|
31
|
-
EmptyTaskInfoError = type("EmptyTaskInfoError", (Exception,), {})
|
|
32
|
-
|
|
33
27
|
from maxframe.config import options
|
|
34
28
|
from maxframe.core import TileableGraph
|
|
35
29
|
from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
|
|
@@ -131,15 +125,14 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
131
125
|
)
|
|
132
126
|
|
|
133
127
|
def _create_maxframe_task(self) -> MaxFrameTask:
|
|
134
|
-
task = MaxFrameTask(
|
|
135
|
-
|
|
136
|
-
major_version=self._major_version,
|
|
137
|
-
service_endpoint=self._odps_entry.endpoint,
|
|
138
|
-
)
|
|
128
|
+
task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
|
|
129
|
+
mf_settings = self.get_settings_to_upload()
|
|
139
130
|
mf_opts = {
|
|
140
|
-
"odps.maxframe.settings": json.dumps(
|
|
131
|
+
"odps.maxframe.settings": json.dumps(mf_settings),
|
|
141
132
|
"odps.maxframe.output_format": self._output_format,
|
|
142
133
|
}
|
|
134
|
+
if mf_settings.get("session.quota_name", None):
|
|
135
|
+
mf_opts["odps.task.wlm.quota"] = mf_settings["session.quota_name"]
|
|
143
136
|
if mf_version:
|
|
144
137
|
mf_opts["odps.maxframe.client_version"] = mf_version
|
|
145
138
|
task.update_settings(mf_opts)
|
|
@@ -200,18 +193,6 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
200
193
|
json.dumps(json_data),
|
|
201
194
|
raise_empty=True,
|
|
202
195
|
)
|
|
203
|
-
except TypeError: # pragma: no cover
|
|
204
|
-
# todo remove when pyodps>=0.12.0 is enforced
|
|
205
|
-
resp_data = self._instance.put_task_info(
|
|
206
|
-
self._task_name, method_name, json.dumps(json_data)
|
|
207
|
-
)
|
|
208
|
-
if resp_data:
|
|
209
|
-
return resp_data
|
|
210
|
-
else:
|
|
211
|
-
raise NoTaskServerResponseError(
|
|
212
|
-
f"No response for request {method_name}. "
|
|
213
|
-
f"Instance ID: {self._instance.id}"
|
|
214
|
-
)
|
|
215
196
|
except EmptyTaskInfoError as ex:
|
|
216
197
|
# retry when server returns HTTP 204, which is designed for retry
|
|
217
198
|
if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
|
|
@@ -11,17 +11,20 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
14
|
import json
|
|
16
15
|
import os
|
|
17
16
|
|
|
18
17
|
import mock
|
|
18
|
+
import pytest
|
|
19
19
|
from defusedxml import ElementTree
|
|
20
20
|
from odps import ODPS
|
|
21
21
|
from odps import options as odps_options
|
|
22
22
|
|
|
23
|
+
from maxframe import options
|
|
24
|
+
from maxframe.config import option_context
|
|
25
|
+
|
|
23
26
|
from ...session.consts import MAXFRAME_OUTPUT_JSON_FORMAT
|
|
24
|
-
from ...session.task import MaxFrameInstanceCaller, MaxFrameTask
|
|
27
|
+
from ...session.task import MaxFrameInstanceCaller, MaxFrameTask, MaxFrameTaskSession
|
|
25
28
|
|
|
26
29
|
expected_file_dir = os.path.join(os.path.dirname(__file__), "expected-data")
|
|
27
30
|
|
|
@@ -53,10 +56,6 @@ def test_maxframe_instance_caller_creating_session():
|
|
|
53
56
|
assert property_node.find("Name").text == "settings"
|
|
54
57
|
setting_dict = json.loads(property_node.find("Value").text)
|
|
55
58
|
assert setting_dict["odps.task.major.version"] == "test_version"
|
|
56
|
-
assert (
|
|
57
|
-
setting_dict["odps.service.endpoint"]
|
|
58
|
-
== "http://100.69.248.78:8002/odps_dailyrunnew"
|
|
59
|
-
)
|
|
60
59
|
|
|
61
60
|
assert setting_dict["odps.maxframe.output_format"] == "json"
|
|
62
61
|
maxframe_setting_dict = json.loads(setting_dict["odps.maxframe.settings"])
|
|
@@ -83,3 +82,33 @@ def test_maxframe_instance_caller_creating_session():
|
|
|
83
82
|
finally:
|
|
84
83
|
odps_options.priority = old_priority
|
|
85
84
|
odps_options.get_priority = old_get_priority
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@pytest.mark.asyncio
|
|
88
|
+
async def test_session_quota_flag_valid():
|
|
89
|
+
def mock_create(self, task: MaxFrameTask, **kwargs):
|
|
90
|
+
assert task.properties["settings"]
|
|
91
|
+
task_settings = json.loads(task.properties["settings"])
|
|
92
|
+
assert task_settings["odps.task.wlm.quota"] == "session_quota"
|
|
93
|
+
|
|
94
|
+
with mock.patch.multiple(
|
|
95
|
+
target="maxframe_client.session.task.MaxFrameInstanceCaller",
|
|
96
|
+
_wait_instance_task_ready=mock.DEFAULT,
|
|
97
|
+
get_session=mock.DEFAULT,
|
|
98
|
+
get_logview_address=mock.DEFAULT,
|
|
99
|
+
), mock.patch("odps.models.instances.BaseInstances.create", mock_create):
|
|
100
|
+
with option_context({"session.quota_name": "session_quota"}):
|
|
101
|
+
with pytest.raises(ValueError):
|
|
102
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota2"
|
|
103
|
+
await MaxFrameTaskSession.init(
|
|
104
|
+
address="test", odps_entry=ODPS.from_environments()
|
|
105
|
+
)
|
|
106
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota"
|
|
107
|
+
mf_task_session = await MaxFrameTaskSession.init(
|
|
108
|
+
address="test", odps_entry=ODPS.from_environments()
|
|
109
|
+
)
|
|
110
|
+
with pytest.raises(ValueError):
|
|
111
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota2"
|
|
112
|
+
mf_task_session._get_diff_settings()
|
|
113
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota"
|
|
114
|
+
mf_task_session._get_diff_settings()
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
|
-
from typing import Dict
|
|
16
|
+
from typing import Any, Dict
|
|
17
17
|
|
|
18
18
|
import mock
|
|
19
19
|
import numpy as np
|
|
@@ -31,7 +31,7 @@ from maxframe.lib.aio import stop_isolation
|
|
|
31
31
|
from maxframe.protocol import ResultInfo
|
|
32
32
|
from maxframe.serialization import RemoteException
|
|
33
33
|
from maxframe.session import new_session
|
|
34
|
-
from maxframe.tests.utils import tn
|
|
34
|
+
from maxframe.tests.utils import ensure_table_deleted, tn
|
|
35
35
|
from maxframe.utils import build_temp_table_name
|
|
36
36
|
from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
|
|
37
37
|
framedriver_app,
|
|
@@ -86,9 +86,12 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
86
86
|
session_id: str,
|
|
87
87
|
dag: TileableGraph,
|
|
88
88
|
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
89
|
+
new_settings: Dict[str, Any] = None,
|
|
89
90
|
):
|
|
90
91
|
assert len(dag) == 2
|
|
91
|
-
return await original_submit_dag(
|
|
92
|
+
return await original_submit_dag(
|
|
93
|
+
self, session_id, dag, managed_input_infos, new_settings
|
|
94
|
+
)
|
|
92
95
|
|
|
93
96
|
no_task_server_raised = False
|
|
94
97
|
original_get_dag_info = MaxFrameRestCaller.get_dag_info
|
|
@@ -130,11 +133,10 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
130
133
|
)
|
|
131
134
|
assert odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
|
|
132
135
|
del df
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
build_temp_table_name(start_mock_session, intermediate_key)
|
|
136
|
+
ensure_table_deleted(
|
|
137
|
+
odps_entry, build_temp_table_name(start_mock_session, intermediate_key)
|
|
136
138
|
)
|
|
137
|
-
|
|
139
|
+
ensure_table_deleted(odps_entry, build_temp_table_name(start_mock_session, key))
|
|
138
140
|
|
|
139
141
|
|
|
140
142
|
def test_run_and_fetch_slice(start_mock_session):
|
|
@@ -166,6 +168,25 @@ def test_run_empty_table(start_mock_session):
|
|
|
166
168
|
empty_table.drop()
|
|
167
169
|
|
|
168
170
|
|
|
171
|
+
def test_run_odps_query_without_schema(start_mock_session):
|
|
172
|
+
odps_entry = ODPS.from_environments()
|
|
173
|
+
|
|
174
|
+
table_name = tn("test_query_without_schema")
|
|
175
|
+
odps_entry.delete_table(table_name, if_exists=True)
|
|
176
|
+
test_table = odps_entry.create_table(table_name, "a double, b double", lifecycle=1)
|
|
177
|
+
|
|
178
|
+
with test_table.open_writer() as writer:
|
|
179
|
+
writer.write([123, 456])
|
|
180
|
+
|
|
181
|
+
df = md.read_odps_query(
|
|
182
|
+
f"select a, b, a + b as `special: name` from {table_name}", skip_schema=True
|
|
183
|
+
)
|
|
184
|
+
executed = df.execute().fetch()
|
|
185
|
+
assert len(executed.dtypes) == 3
|
|
186
|
+
|
|
187
|
+
test_table.drop()
|
|
188
|
+
|
|
189
|
+
|
|
169
190
|
def test_run_dataframe_with_pd_source(start_mock_session):
|
|
170
191
|
odps_entry = ODPS.from_environments()
|
|
171
192
|
|
|
@@ -246,9 +267,8 @@ def test_run_and_fetch_series(start_mock_session):
|
|
|
246
267
|
pd.testing.assert_series_equal(pd_result, result)
|
|
247
268
|
|
|
248
269
|
del s1
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
build_temp_table_name(start_mock_session, src_key)
|
|
270
|
+
ensure_table_deleted(
|
|
271
|
+
odps_entry, build_temp_table_name(start_mock_session, src_key)
|
|
252
272
|
)
|
|
253
273
|
finally:
|
|
254
274
|
odps_entry.delete_table(
|
|
File without changes
|