maxframe 1.0.0rc4__cp310-cp310-macosx_10_9_universal2.whl → 1.1.1__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (88) hide show
  1. maxframe/_utils.cpython-310-darwin.so +0 -0
  2. maxframe/config/__init__.py +1 -1
  3. maxframe/config/config.py +26 -0
  4. maxframe/config/tests/test_config.py +20 -1
  5. maxframe/conftest.py +17 -4
  6. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  7. maxframe/core/operator/base.py +2 -0
  8. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
  9. maxframe/dataframe/core.py +24 -2
  10. maxframe/dataframe/datasource/read_odps_query.py +65 -35
  11. maxframe/dataframe/datasource/read_odps_table.py +4 -2
  12. maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
  13. maxframe/dataframe/extensions/__init__.py +5 -0
  14. maxframe/dataframe/extensions/apply_chunk.py +649 -0
  15. maxframe/dataframe/extensions/flatjson.py +131 -0
  16. maxframe/dataframe/extensions/flatmap.py +28 -40
  17. maxframe/dataframe/extensions/reshuffle.py +1 -1
  18. maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
  19. maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
  20. maxframe/dataframe/groupby/__init__.py +1 -0
  21. maxframe/dataframe/groupby/aggregation.py +1 -0
  22. maxframe/dataframe/groupby/apply.py +9 -1
  23. maxframe/dataframe/groupby/core.py +1 -1
  24. maxframe/dataframe/groupby/fill.py +4 -1
  25. maxframe/dataframe/groupby/getitem.py +6 -0
  26. maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
  27. maxframe/dataframe/groupby/transform.py +8 -2
  28. maxframe/dataframe/indexing/loc.py +6 -4
  29. maxframe/dataframe/merge/__init__.py +9 -1
  30. maxframe/dataframe/merge/concat.py +41 -31
  31. maxframe/dataframe/merge/merge.py +1 -1
  32. maxframe/dataframe/merge/tests/test_merge.py +3 -1
  33. maxframe/dataframe/misc/apply.py +3 -0
  34. maxframe/dataframe/misc/drop_duplicates.py +5 -1
  35. maxframe/dataframe/misc/map.py +3 -1
  36. maxframe/dataframe/misc/tests/test_misc.py +24 -2
  37. maxframe/dataframe/misc/transform.py +22 -13
  38. maxframe/dataframe/reduction/__init__.py +3 -0
  39. maxframe/dataframe/reduction/aggregation.py +1 -0
  40. maxframe/dataframe/reduction/median.py +56 -0
  41. maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
  42. maxframe/dataframe/statistics/quantile.py +8 -2
  43. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  44. maxframe/dataframe/tests/test_utils.py +60 -0
  45. maxframe/dataframe/utils.py +110 -7
  46. maxframe/dataframe/window/expanding.py +5 -3
  47. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  48. maxframe/io/objects/tests/test_object_io.py +39 -12
  49. maxframe/io/odpsio/__init__.py +1 -1
  50. maxframe/io/odpsio/arrow.py +51 -2
  51. maxframe/io/odpsio/schema.py +23 -5
  52. maxframe/io/odpsio/tableio.py +80 -124
  53. maxframe/io/odpsio/tests/test_schema.py +40 -0
  54. maxframe/io/odpsio/tests/test_tableio.py +5 -5
  55. maxframe/io/odpsio/tests/test_volumeio.py +35 -11
  56. maxframe/io/odpsio/volumeio.py +27 -3
  57. maxframe/learn/contrib/__init__.py +3 -2
  58. maxframe/learn/contrib/llm/__init__.py +16 -0
  59. maxframe/learn/contrib/llm/core.py +54 -0
  60. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  61. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  62. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  63. maxframe/learn/contrib/llm/text.py +42 -0
  64. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  65. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  66. maxframe/opcodes.py +7 -1
  67. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  68. maxframe/serialization/core.pyx +13 -1
  69. maxframe/serialization/pandas.py +50 -20
  70. maxframe/serialization/serializables/core.py +70 -15
  71. maxframe/serialization/serializables/field_type.py +4 -1
  72. maxframe/serialization/serializables/tests/test_serializable.py +12 -2
  73. maxframe/serialization/tests/test_serial.py +2 -1
  74. maxframe/tensor/__init__.py +19 -7
  75. maxframe/tensor/merge/vstack.py +1 -1
  76. maxframe/tests/utils.py +16 -0
  77. maxframe/udf.py +27 -0
  78. maxframe/utils.py +42 -8
  79. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/METADATA +4 -4
  80. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/RECORD +88 -77
  81. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/WHEEL +1 -1
  82. maxframe_client/clients/framedriver.py +4 -1
  83. maxframe_client/fetcher.py +23 -8
  84. maxframe_client/session/odps.py +40 -11
  85. maxframe_client/session/task.py +6 -25
  86. maxframe_client/session/tests/test_task.py +35 -6
  87. maxframe_client/tests/test_session.py +30 -10
  88. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/top_level.txt +0 -0
@@ -21,15 +21,9 @@ from typing import Any, Dict, List, Optional, Type, Union
21
21
  import msgpack
22
22
  from odps import ODPS
23
23
  from odps import options as odps_options
24
- from odps.errors import parse_instance_error
24
+ from odps.errors import EmptyTaskInfoError, parse_instance_error
25
25
  from odps.models import Instance, MaxFrameTask
26
26
 
27
- try:
28
- from odps.errors import EmptyTaskInfoError
29
- except ImportError: # pragma: no cover
30
- # todo remove when pyodps>=0.12.0 is enforced
31
- EmptyTaskInfoError = type("EmptyTaskInfoError", (Exception,), {})
32
-
33
27
  from maxframe.config import options
34
28
  from maxframe.core import TileableGraph
35
29
  from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
@@ -131,15 +125,14 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
131
125
  )
132
126
 
133
127
  def _create_maxframe_task(self) -> MaxFrameTask:
134
- task = MaxFrameTask(
135
- name=self._task_name,
136
- major_version=self._major_version,
137
- service_endpoint=self._odps_entry.endpoint,
138
- )
128
+ task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
129
+ mf_settings = self.get_settings_to_upload()
139
130
  mf_opts = {
140
- "odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
131
+ "odps.maxframe.settings": json.dumps(mf_settings),
141
132
  "odps.maxframe.output_format": self._output_format,
142
133
  }
134
+ if mf_settings.get("session.quota_name", None):
135
+ mf_opts["odps.task.wlm.quota"] = mf_settings["session.quota_name"]
143
136
  if mf_version:
144
137
  mf_opts["odps.maxframe.client_version"] = mf_version
145
138
  task.update_settings(mf_opts)
@@ -200,18 +193,6 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
200
193
  json.dumps(json_data),
201
194
  raise_empty=True,
202
195
  )
203
- except TypeError: # pragma: no cover
204
- # todo remove when pyodps>=0.12.0 is enforced
205
- resp_data = self._instance.put_task_info(
206
- self._task_name, method_name, json.dumps(json_data)
207
- )
208
- if resp_data:
209
- return resp_data
210
- else:
211
- raise NoTaskServerResponseError(
212
- f"No response for request {method_name}. "
213
- f"Instance ID: {self._instance.id}"
214
- )
215
196
  except EmptyTaskInfoError as ex:
216
197
  # retry when server returns HTTP 204, which is designed for retry
217
198
  if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
@@ -11,17 +11,20 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  import json
16
15
  import os
17
16
 
18
17
  import mock
18
+ import pytest
19
19
  from defusedxml import ElementTree
20
20
  from odps import ODPS
21
21
  from odps import options as odps_options
22
22
 
23
+ from maxframe import options
24
+ from maxframe.config import option_context
25
+
23
26
  from ...session.consts import MAXFRAME_OUTPUT_JSON_FORMAT
24
- from ...session.task import MaxFrameInstanceCaller, MaxFrameTask
27
+ from ...session.task import MaxFrameInstanceCaller, MaxFrameTask, MaxFrameTaskSession
25
28
 
26
29
  expected_file_dir = os.path.join(os.path.dirname(__file__), "expected-data")
27
30
 
@@ -53,10 +56,6 @@ def test_maxframe_instance_caller_creating_session():
53
56
  assert property_node.find("Name").text == "settings"
54
57
  setting_dict = json.loads(property_node.find("Value").text)
55
58
  assert setting_dict["odps.task.major.version"] == "test_version"
56
- assert (
57
- setting_dict["odps.service.endpoint"]
58
- == "http://100.69.248.78:8002/odps_dailyrunnew"
59
- )
60
59
 
61
60
  assert setting_dict["odps.maxframe.output_format"] == "json"
62
61
  maxframe_setting_dict = json.loads(setting_dict["odps.maxframe.settings"])
@@ -83,3 +82,33 @@ def test_maxframe_instance_caller_creating_session():
83
82
  finally:
84
83
  odps_options.priority = old_priority
85
84
  odps_options.get_priority = old_get_priority
85
+
86
+
87
+ @pytest.mark.asyncio
88
+ async def test_session_quota_flag_valid():
89
+ def mock_create(self, task: MaxFrameTask, **kwargs):
90
+ assert task.properties["settings"]
91
+ task_settings = json.loads(task.properties["settings"])
92
+ assert task_settings["odps.task.wlm.quota"] == "session_quota"
93
+
94
+ with mock.patch.multiple(
95
+ target="maxframe_client.session.task.MaxFrameInstanceCaller",
96
+ _wait_instance_task_ready=mock.DEFAULT,
97
+ get_session=mock.DEFAULT,
98
+ get_logview_address=mock.DEFAULT,
99
+ ), mock.patch("odps.models.instances.BaseInstances.create", mock_create):
100
+ with option_context({"session.quota_name": "session_quota"}):
101
+ with pytest.raises(ValueError):
102
+ options.sql.settings["odps.task.wlm.quota"] = "session_quota2"
103
+ await MaxFrameTaskSession.init(
104
+ address="test", odps_entry=ODPS.from_environments()
105
+ )
106
+ options.sql.settings["odps.task.wlm.quota"] = "session_quota"
107
+ mf_task_session = await MaxFrameTaskSession.init(
108
+ address="test", odps_entry=ODPS.from_environments()
109
+ )
110
+ with pytest.raises(ValueError):
111
+ options.sql.settings["odps.task.wlm.quota"] = "session_quota2"
112
+ mf_task_session._get_diff_settings()
113
+ options.sql.settings["odps.task.wlm.quota"] = "session_quota"
114
+ mf_task_session._get_diff_settings()
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import time
16
- from typing import Dict
16
+ from typing import Any, Dict
17
17
 
18
18
  import mock
19
19
  import numpy as np
@@ -31,7 +31,7 @@ from maxframe.lib.aio import stop_isolation
31
31
  from maxframe.protocol import ResultInfo
32
32
  from maxframe.serialization import RemoteException
33
33
  from maxframe.session import new_session
34
- from maxframe.tests.utils import tn
34
+ from maxframe.tests.utils import ensure_table_deleted, tn
35
35
  from maxframe.utils import build_temp_table_name
36
36
  from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
37
37
  framedriver_app,
@@ -86,9 +86,12 @@ def test_simple_run_dataframe(start_mock_session):
86
86
  session_id: str,
87
87
  dag: TileableGraph,
88
88
  managed_input_infos: Dict[str, ResultInfo] = None,
89
+ new_settings: Dict[str, Any] = None,
89
90
  ):
90
91
  assert len(dag) == 2
91
- return await original_submit_dag(self, session_id, dag, managed_input_infos)
92
+ return await original_submit_dag(
93
+ self, session_id, dag, managed_input_infos, new_settings
94
+ )
92
95
 
93
96
  no_task_server_raised = False
94
97
  original_get_dag_info = MaxFrameRestCaller.get_dag_info
@@ -130,11 +133,10 @@ def test_simple_run_dataframe(start_mock_session):
130
133
  )
131
134
  assert odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
132
135
  del df
133
- time.sleep(5)
134
- assert not odps_entry.exist_table(
135
- build_temp_table_name(start_mock_session, intermediate_key)
136
+ ensure_table_deleted(
137
+ odps_entry, build_temp_table_name(start_mock_session, intermediate_key)
136
138
  )
137
- assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
139
+ ensure_table_deleted(odps_entry, build_temp_table_name(start_mock_session, key))
138
140
 
139
141
 
140
142
  def test_run_and_fetch_slice(start_mock_session):
@@ -166,6 +168,25 @@ def test_run_empty_table(start_mock_session):
166
168
  empty_table.drop()
167
169
 
168
170
 
171
+ def test_run_odps_query_without_schema(start_mock_session):
172
+ odps_entry = ODPS.from_environments()
173
+
174
+ table_name = tn("test_query_without_schema")
175
+ odps_entry.delete_table(table_name, if_exists=True)
176
+ test_table = odps_entry.create_table(table_name, "a double, b double", lifecycle=1)
177
+
178
+ with test_table.open_writer() as writer:
179
+ writer.write([123, 456])
180
+
181
+ df = md.read_odps_query(
182
+ f"select a, b, a + b as `special: name` from {table_name}", skip_schema=True
183
+ )
184
+ executed = df.execute().fetch()
185
+ assert len(executed.dtypes) == 3
186
+
187
+ test_table.drop()
188
+
189
+
169
190
  def test_run_dataframe_with_pd_source(start_mock_session):
170
191
  odps_entry = ODPS.from_environments()
171
192
 
@@ -246,9 +267,8 @@ def test_run_and_fetch_series(start_mock_session):
246
267
  pd.testing.assert_series_equal(pd_result, result)
247
268
 
248
269
  del s1
249
- time.sleep(5)
250
- assert not odps_entry.exist_table(
251
- build_temp_table_name(start_mock_session, src_key)
270
+ ensure_table_deleted(
271
+ odps_entry, build_temp_table_name(start_mock_session, src_key)
252
272
  )
253
273
  finally:
254
274
  odps_entry.delete_table(