maxframe 1.0.0rc4__cp37-cp37m-win32.whl → 1.1.0__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (83) hide show
  1. maxframe/_utils.cp37-win32.pyd +0 -0
  2. maxframe/config/config.py +3 -0
  3. maxframe/conftest.py +9 -2
  4. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  5. maxframe/core/operator/base.py +2 -0
  6. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
  7. maxframe/dataframe/core.py +24 -2
  8. maxframe/dataframe/datasource/read_odps_query.py +63 -34
  9. maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
  10. maxframe/dataframe/extensions/__init__.py +5 -0
  11. maxframe/dataframe/extensions/apply_chunk.py +649 -0
  12. maxframe/dataframe/extensions/flatjson.py +131 -0
  13. maxframe/dataframe/extensions/flatmap.py +28 -40
  14. maxframe/dataframe/extensions/reshuffle.py +1 -1
  15. maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
  16. maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
  17. maxframe/dataframe/groupby/__init__.py +1 -0
  18. maxframe/dataframe/groupby/aggregation.py +1 -0
  19. maxframe/dataframe/groupby/apply.py +9 -1
  20. maxframe/dataframe/groupby/core.py +1 -1
  21. maxframe/dataframe/groupby/fill.py +4 -1
  22. maxframe/dataframe/groupby/getitem.py +6 -0
  23. maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
  24. maxframe/dataframe/groupby/transform.py +8 -2
  25. maxframe/dataframe/indexing/loc.py +6 -4
  26. maxframe/dataframe/merge/__init__.py +9 -1
  27. maxframe/dataframe/merge/concat.py +41 -31
  28. maxframe/dataframe/merge/merge.py +1 -1
  29. maxframe/dataframe/merge/tests/test_merge.py +3 -1
  30. maxframe/dataframe/misc/apply.py +3 -0
  31. maxframe/dataframe/misc/drop_duplicates.py +5 -1
  32. maxframe/dataframe/misc/map.py +3 -1
  33. maxframe/dataframe/misc/tests/test_misc.py +24 -2
  34. maxframe/dataframe/misc/transform.py +22 -13
  35. maxframe/dataframe/reduction/__init__.py +3 -0
  36. maxframe/dataframe/reduction/aggregation.py +1 -0
  37. maxframe/dataframe/reduction/median.py +56 -0
  38. maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
  39. maxframe/dataframe/statistics/quantile.py +8 -2
  40. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  41. maxframe/dataframe/tests/test_utils.py +60 -0
  42. maxframe/dataframe/utils.py +110 -7
  43. maxframe/dataframe/window/expanding.py +5 -3
  44. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  45. maxframe/io/objects/tests/test_object_io.py +39 -12
  46. maxframe/io/odpsio/arrow.py +30 -2
  47. maxframe/io/odpsio/schema.py +23 -5
  48. maxframe/io/odpsio/tableio.py +26 -110
  49. maxframe/io/odpsio/tests/test_schema.py +40 -0
  50. maxframe/io/odpsio/tests/test_tableio.py +5 -5
  51. maxframe/io/odpsio/tests/test_volumeio.py +35 -11
  52. maxframe/io/odpsio/volumeio.py +27 -3
  53. maxframe/learn/contrib/__init__.py +3 -2
  54. maxframe/learn/contrib/llm/__init__.py +16 -0
  55. maxframe/learn/contrib/llm/core.py +54 -0
  56. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  57. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  58. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  59. maxframe/learn/contrib/llm/text.py +42 -0
  60. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  61. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  62. maxframe/opcodes.py +7 -1
  63. maxframe/serialization/core.cp37-win32.pyd +0 -0
  64. maxframe/serialization/core.pyx +13 -1
  65. maxframe/serialization/pandas.py +50 -20
  66. maxframe/serialization/serializables/core.py +24 -5
  67. maxframe/serialization/serializables/field_type.py +4 -1
  68. maxframe/serialization/serializables/tests/test_serializable.py +8 -1
  69. maxframe/serialization/tests/test_serial.py +2 -1
  70. maxframe/tensor/__init__.py +19 -7
  71. maxframe/tests/utils.py +16 -0
  72. maxframe/udf.py +27 -0
  73. maxframe/utils.py +36 -8
  74. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
  75. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/RECORD +83 -72
  76. maxframe_client/clients/framedriver.py +4 -1
  77. maxframe_client/fetcher.py +18 -2
  78. maxframe_client/session/odps.py +23 -10
  79. maxframe_client/session/task.py +2 -24
  80. maxframe_client/session/tests/test_task.py +0 -4
  81. maxframe_client/tests/test_session.py +30 -10
  82. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/WHEEL +0 -0
  83. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@
14
14
 
15
15
  import abc
16
16
  import asyncio
17
+ import copy
17
18
  import logging
18
19
  import time
19
20
  import weakref
@@ -25,6 +26,7 @@ import numpy as np
25
26
  import pandas as pd
26
27
  from odps import ODPS
27
28
  from odps import options as odps_options
29
+ from odps.console import in_ipython_frontend
28
30
 
29
31
  from maxframe.config import options
30
32
  from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
@@ -113,6 +115,8 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
113
115
  mf_settings["session.quota_name"] = quota_name
114
116
  if enable_schema is not None:
115
117
  mf_settings["session.enable_schema"] = enable_schema
118
+ if options.session.enable_high_availability is None:
119
+ mf_settings["session.enable_high_availability"] = not in_ipython_frontend()
116
120
  mf_settings["session.default_schema"] = default_schema or "default"
117
121
  return mf_settings
118
122
 
@@ -184,6 +188,8 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
184
188
 
185
189
  self._caller = self._create_caller(odps_entry, address, **kwargs)
186
190
  self._last_settings = None
191
+ self._pull_interval = 1 if in_ipython_frontend() else 3
192
+ self._replace_internal_host = kwargs.get("replace_internal_host", True)
187
193
 
188
194
  @classmethod
189
195
  def _create_caller(
@@ -193,7 +199,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
193
199
 
194
200
  async def _init(self, _address: str):
195
201
  session_info = await self.ensure_async_call(self._caller.create_session)
196
- self._last_settings = self._caller.get_settings_to_upload()
202
+ self._last_settings = copy.deepcopy(self._caller.get_settings_to_upload())
197
203
  self._session_id = session_info.session_id
198
204
  await self._show_logview_address()
199
205
 
@@ -250,7 +256,12 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
250
256
  self, t: TileableType
251
257
  ) -> Optional[TileableType]:
252
258
  vol_name = build_session_volume_name(self.session_id)
253
- writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
259
+ writer = ODPSVolumeWriter(
260
+ self._odps_entry,
261
+ vol_name,
262
+ t.key,
263
+ replace_internal_host=self._replace_internal_host,
264
+ )
254
265
  io_handler = get_object_io_handler(t)
255
266
  io_handler().write_object(writer, t, t.op.data)
256
267
  return build_fetch(t).data
@@ -318,7 +329,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
318
329
  def _get_diff_settings(self) -> Dict[str, Any]:
319
330
  new_settings = self._caller.get_settings_to_upload()
320
331
  if not self._last_settings: # pragma: no cover
321
- self._last_settings = new_settings
332
+ self._last_settings = copy.deepcopy(new_settings)
322
333
  return new_settings
323
334
 
324
335
  update = dict()
@@ -330,7 +341,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
330
341
  update[k] = new_item
331
342
  except: # noqa: E722 # nosec # pylint: disable=bare-except
332
343
  update[k] = new_item
333
- self._last_settings = new_settings
344
+ self._last_settings = copy.deepcopy(new_settings)
334
345
  return update
335
346
 
336
347
  async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
@@ -379,18 +390,18 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
379
390
  start_time = time.time()
380
391
  session_id = dag_info.session_id
381
392
  dag_id = dag_info.dag_id
382
- wait_timeout = 10
383
393
  server_no_response_time = None
384
394
  with enter_mode(build=True, kernel=True):
385
395
  key_to_tileables = {t.key: t for t in tileables}
386
-
396
+ timeout_val = 0.1
387
397
  try:
388
398
  while True:
389
399
  elapsed_time = time.time() - start_time
400
+ next_timeout_val = min(timeout_val * 2, self._pull_interval)
390
401
  timeout_val = (
391
- min(self.timeout - elapsed_time, wait_timeout)
402
+ min(self.timeout - elapsed_time, next_timeout_val)
392
403
  if self.timeout
393
- else wait_timeout
404
+ else next_timeout_val
394
405
  )
395
406
  if timeout_val <= 0:
396
407
  raise TimeoutError("Running DAG timed out")
@@ -584,7 +595,9 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
584
595
  managed_input_infos: Dict[str, ResultInfo] = None,
585
596
  new_settings: Dict[str, Any] = None,
586
597
  ) -> DagInfo:
587
- return await self._client.submit_dag(self._session_id, dag, managed_input_infos)
598
+ return await self._client.submit_dag(
599
+ self._session_id, dag, managed_input_infos, new_settings=new_settings
600
+ )
588
601
 
589
602
  async def get_dag_info(self, dag_id: str) -> DagInfo:
590
603
  return await self._client.get_dag_info(self._session_id, dag_id)
@@ -617,7 +630,7 @@ class MaxFrameRestSession(MaxFrameSession):
617
630
  real_endpoint = address.replace(f"{parsed_endpoint.scheme}://", f"{scheme}://")
618
631
 
619
632
  super().__init__(
620
- real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout
633
+ real_endpoint, session_id, odps_entry=odps_entry, timeout=timeout, **kwargs
621
634
  )
622
635
 
623
636
  @classmethod
@@ -21,15 +21,9 @@ from typing import Any, Dict, List, Optional, Type, Union
21
21
  import msgpack
22
22
  from odps import ODPS
23
23
  from odps import options as odps_options
24
- from odps.errors import parse_instance_error
24
+ from odps.errors import EmptyTaskInfoError, parse_instance_error
25
25
  from odps.models import Instance, MaxFrameTask
26
26
 
27
- try:
28
- from odps.errors import EmptyTaskInfoError
29
- except ImportError: # pragma: no cover
30
- # todo remove when pyodps>=0.12.0 is enforced
31
- EmptyTaskInfoError = type("EmptyTaskInfoError", (Exception,), {})
32
-
33
27
  from maxframe.config import options
34
28
  from maxframe.core import TileableGraph
35
29
  from maxframe.errors import NoTaskServerResponseError, SessionAlreadyClosedError
@@ -131,11 +125,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
131
125
  )
132
126
 
133
127
  def _create_maxframe_task(self) -> MaxFrameTask:
134
- task = MaxFrameTask(
135
- name=self._task_name,
136
- major_version=self._major_version,
137
- service_endpoint=self._odps_entry.endpoint,
138
- )
128
+ task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
139
129
  mf_opts = {
140
130
  "odps.maxframe.settings": json.dumps(self.get_settings_to_upload()),
141
131
  "odps.maxframe.output_format": self._output_format,
@@ -200,18 +190,6 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
200
190
  json.dumps(json_data),
201
191
  raise_empty=True,
202
192
  )
203
- except TypeError: # pragma: no cover
204
- # todo remove when pyodps>=0.12.0 is enforced
205
- resp_data = self._instance.put_task_info(
206
- self._task_name, method_name, json.dumps(json_data)
207
- )
208
- if resp_data:
209
- return resp_data
210
- else:
211
- raise NoTaskServerResponseError(
212
- f"No response for request {method_name}. "
213
- f"Instance ID: {self._instance.id}"
214
- )
215
193
  except EmptyTaskInfoError as ex:
216
194
  # retry when server returns HTTP 204, which is designed for retry
217
195
  if ex.code != 204 or trial >= EMPTY_RESPONSE_RETRY_COUNT - 1:
@@ -53,10 +53,6 @@ def test_maxframe_instance_caller_creating_session():
53
53
  assert property_node.find("Name").text == "settings"
54
54
  setting_dict = json.loads(property_node.find("Value").text)
55
55
  assert setting_dict["odps.task.major.version"] == "test_version"
56
- assert (
57
- setting_dict["odps.service.endpoint"]
58
- == "http://100.69.248.78:8002/odps_dailyrunnew"
59
- )
60
56
 
61
57
  assert setting_dict["odps.maxframe.output_format"] == "json"
62
58
  maxframe_setting_dict = json.loads(setting_dict["odps.maxframe.settings"])
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import time
16
- from typing import Dict
16
+ from typing import Any, Dict
17
17
 
18
18
  import mock
19
19
  import numpy as np
@@ -31,7 +31,7 @@ from maxframe.lib.aio import stop_isolation
31
31
  from maxframe.protocol import ResultInfo
32
32
  from maxframe.serialization import RemoteException
33
33
  from maxframe.session import new_session
34
- from maxframe.tests.utils import tn
34
+ from maxframe.tests.utils import ensure_table_deleted, tn
35
35
  from maxframe.utils import build_temp_table_name
36
36
  from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
37
37
  framedriver_app,
@@ -86,9 +86,12 @@ def test_simple_run_dataframe(start_mock_session):
86
86
  session_id: str,
87
87
  dag: TileableGraph,
88
88
  managed_input_infos: Dict[str, ResultInfo] = None,
89
+ new_settings: Dict[str, Any] = None,
89
90
  ):
90
91
  assert len(dag) == 2
91
- return await original_submit_dag(self, session_id, dag, managed_input_infos)
92
+ return await original_submit_dag(
93
+ self, session_id, dag, managed_input_infos, new_settings
94
+ )
92
95
 
93
96
  no_task_server_raised = False
94
97
  original_get_dag_info = MaxFrameRestCaller.get_dag_info
@@ -130,11 +133,10 @@ def test_simple_run_dataframe(start_mock_session):
130
133
  )
131
134
  assert odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
132
135
  del df
133
- time.sleep(5)
134
- assert not odps_entry.exist_table(
135
- build_temp_table_name(start_mock_session, intermediate_key)
136
+ ensure_table_deleted(
137
+ odps_entry, build_temp_table_name(start_mock_session, intermediate_key)
136
138
  )
137
- assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
139
+ ensure_table_deleted(odps_entry, build_temp_table_name(start_mock_session, key))
138
140
 
139
141
 
140
142
  def test_run_and_fetch_slice(start_mock_session):
@@ -166,6 +168,25 @@ def test_run_empty_table(start_mock_session):
166
168
  empty_table.drop()
167
169
 
168
170
 
171
+ def test_run_odps_query_without_schema(start_mock_session):
172
+ odps_entry = ODPS.from_environments()
173
+
174
+ table_name = tn("test_query_without_schema")
175
+ odps_entry.delete_table(table_name, if_exists=True)
176
+ test_table = odps_entry.create_table(table_name, "a double, b double", lifecycle=1)
177
+
178
+ with test_table.open_writer() as writer:
179
+ writer.write([123, 456])
180
+
181
+ df = md.read_odps_query(
182
+ f"select a, b, a + b as `special: name` from {table_name}", skip_schema=True
183
+ )
184
+ executed = df.execute().fetch()
185
+ assert len(executed.dtypes) == 3
186
+
187
+ test_table.drop()
188
+
189
+
169
190
  def test_run_dataframe_with_pd_source(start_mock_session):
170
191
  odps_entry = ODPS.from_environments()
171
192
 
@@ -246,9 +267,8 @@ def test_run_and_fetch_series(start_mock_session):
246
267
  pd.testing.assert_series_equal(pd_result, result)
247
268
 
248
269
  del s1
249
- time.sleep(5)
250
- assert not odps_entry.exist_table(
251
- build_temp_table_name(start_mock_session, src_key)
270
+ ensure_table_deleted(
271
+ odps_entry, build_temp_table_name(start_mock_session, src_key)
252
272
  )
253
273
  finally:
254
274
  odps_entry.delete_table(