maxframe 0.1.0b5__cp39-cp39-macosx_10_9_universal2.whl → 1.0.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (203) hide show
  1. maxframe/_utils.cpython-39-darwin.so +0 -0
  2. maxframe/codegen.py +10 -4
  3. maxframe/config/config.py +68 -10
  4. maxframe/config/validators.py +42 -11
  5. maxframe/conftest.py +58 -14
  6. maxframe/core/__init__.py +2 -16
  7. maxframe/core/entity/__init__.py +1 -12
  8. maxframe/core/entity/executable.py +1 -1
  9. maxframe/core/entity/objects.py +46 -45
  10. maxframe/core/entity/output_types.py +0 -3
  11. maxframe/core/entity/tests/test_objects.py +43 -0
  12. maxframe/core/entity/tileables.py +5 -78
  13. maxframe/core/graph/__init__.py +2 -2
  14. maxframe/core/graph/builder/__init__.py +0 -1
  15. maxframe/core/graph/builder/base.py +5 -4
  16. maxframe/core/graph/builder/tileable.py +4 -4
  17. maxframe/core/graph/builder/utils.py +4 -8
  18. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  19. maxframe/core/graph/core.pyx +4 -4
  20. maxframe/core/graph/entity.py +9 -33
  21. maxframe/core/operator/__init__.py +2 -9
  22. maxframe/core/operator/base.py +3 -5
  23. maxframe/core/operator/objects.py +0 -9
  24. maxframe/core/operator/utils.py +55 -0
  25. maxframe/dataframe/__init__.py +1 -1
  26. maxframe/dataframe/arithmetic/around.py +5 -17
  27. maxframe/dataframe/arithmetic/core.py +15 -7
  28. maxframe/dataframe/arithmetic/docstring.py +7 -33
  29. maxframe/dataframe/arithmetic/equal.py +4 -2
  30. maxframe/dataframe/arithmetic/greater.py +4 -2
  31. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  32. maxframe/dataframe/arithmetic/less.py +2 -2
  33. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  34. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  36. maxframe/dataframe/core.py +31 -7
  37. maxframe/dataframe/datasource/date_range.py +2 -2
  38. maxframe/dataframe/datasource/read_odps_query.py +117 -23
  39. maxframe/dataframe/datasource/read_odps_table.py +6 -3
  40. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  41. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  42. maxframe/dataframe/datastore/to_odps.py +28 -0
  43. maxframe/dataframe/extensions/__init__.py +5 -0
  44. maxframe/dataframe/extensions/flatjson.py +131 -0
  45. maxframe/dataframe/extensions/flatmap.py +317 -0
  46. maxframe/dataframe/extensions/reshuffle.py +1 -1
  47. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  48. maxframe/dataframe/groupby/core.py +1 -1
  49. maxframe/dataframe/groupby/cum.py +0 -1
  50. maxframe/dataframe/groupby/fill.py +4 -1
  51. maxframe/dataframe/groupby/getitem.py +6 -0
  52. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  53. maxframe/dataframe/groupby/transform.py +5 -1
  54. maxframe/dataframe/indexing/align.py +1 -1
  55. maxframe/dataframe/indexing/loc.py +6 -4
  56. maxframe/dataframe/indexing/rename.py +5 -28
  57. maxframe/dataframe/indexing/sample.py +0 -1
  58. maxframe/dataframe/indexing/set_index.py +68 -1
  59. maxframe/dataframe/initializer.py +11 -1
  60. maxframe/dataframe/merge/__init__.py +9 -1
  61. maxframe/dataframe/merge/concat.py +41 -31
  62. maxframe/dataframe/merge/merge.py +237 -3
  63. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  64. maxframe/dataframe/misc/apply.py +5 -10
  65. maxframe/dataframe/misc/case_when.py +1 -1
  66. maxframe/dataframe/misc/describe.py +2 -2
  67. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  68. maxframe/dataframe/misc/eval.py +4 -0
  69. maxframe/dataframe/misc/memory_usage.py +2 -2
  70. maxframe/dataframe/misc/pct_change.py +1 -83
  71. maxframe/dataframe/misc/tests/test_misc.py +33 -2
  72. maxframe/dataframe/misc/transform.py +1 -30
  73. maxframe/dataframe/misc/value_counts.py +4 -17
  74. maxframe/dataframe/missing/dropna.py +1 -1
  75. maxframe/dataframe/missing/fillna.py +5 -5
  76. maxframe/dataframe/operators.py +1 -17
  77. maxframe/dataframe/reduction/core.py +2 -2
  78. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  79. maxframe/dataframe/sort/sort_values.py +1 -11
  80. maxframe/dataframe/statistics/corr.py +3 -3
  81. maxframe/dataframe/statistics/quantile.py +13 -19
  82. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  83. maxframe/dataframe/tests/test_initializer.py +33 -2
  84. maxframe/dataframe/utils.py +26 -11
  85. maxframe/dataframe/window/expanding.py +5 -3
  86. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  87. maxframe/errors.py +13 -0
  88. maxframe/extension.py +12 -0
  89. maxframe/io/__init__.py +13 -0
  90. maxframe/io/objects/__init__.py +24 -0
  91. maxframe/io/objects/core.py +140 -0
  92. maxframe/io/objects/tensor.py +76 -0
  93. maxframe/io/objects/tests/__init__.py +13 -0
  94. maxframe/io/objects/tests/test_object_io.py +97 -0
  95. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  96. maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
  97. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  98. maxframe/io/odpsio/tableio.py +719 -0
  99. maxframe/io/odpsio/tests/__init__.py +13 -0
  100. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
  101. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  102. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  103. maxframe/io/odpsio/volumeio.py +63 -0
  104. maxframe/learn/contrib/__init__.py +3 -1
  105. maxframe/learn/contrib/graph/__init__.py +15 -0
  106. maxframe/learn/contrib/graph/connected_components.py +215 -0
  107. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  108. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  109. maxframe/learn/contrib/llm/__init__.py +16 -0
  110. maxframe/learn/contrib/llm/core.py +54 -0
  111. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  112. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  113. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  114. maxframe/learn/contrib/llm/text.py +42 -0
  115. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  116. maxframe/learn/contrib/xgboost/core.py +87 -2
  117. maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
  118. maxframe/learn/contrib/xgboost/predict.py +29 -46
  119. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  120. maxframe/learn/contrib/xgboost/train.py +29 -18
  121. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  122. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  123. maxframe/lib/mmh3.pyi +43 -0
  124. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  125. maxframe/lib/wrapped_pickle.py +2 -1
  126. maxframe/opcodes.py +8 -0
  127. maxframe/protocol.py +154 -27
  128. maxframe/remote/core.py +4 -8
  129. maxframe/serialization/__init__.py +1 -0
  130. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  131. maxframe/serialization/core.pxd +3 -0
  132. maxframe/serialization/core.pyi +3 -0
  133. maxframe/serialization/core.pyx +67 -26
  134. maxframe/serialization/exception.py +1 -1
  135. maxframe/serialization/pandas.py +52 -17
  136. maxframe/serialization/serializables/core.py +180 -15
  137. maxframe/serialization/serializables/field_type.py +4 -1
  138. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  139. maxframe/serialization/tests/test_serial.py +2 -1
  140. maxframe/session.py +9 -2
  141. maxframe/tensor/__init__.py +81 -2
  142. maxframe/tensor/arithmetic/isclose.py +1 -0
  143. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  144. maxframe/tensor/core.py +5 -136
  145. maxframe/tensor/datasource/array.py +3 -0
  146. maxframe/tensor/datasource/full.py +1 -1
  147. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  148. maxframe/tensor/indexing/flatnonzero.py +1 -1
  149. maxframe/tensor/indexing/getitem.py +2 -0
  150. maxframe/tensor/merge/__init__.py +2 -0
  151. maxframe/tensor/merge/concatenate.py +101 -0
  152. maxframe/tensor/merge/tests/test_merge.py +30 -1
  153. maxframe/tensor/merge/vstack.py +74 -0
  154. maxframe/tensor/{base → misc}/__init__.py +2 -0
  155. maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
  156. maxframe/tensor/misc/atleast_2d.py +70 -0
  157. maxframe/tensor/misc/atleast_3d.py +85 -0
  158. maxframe/tensor/misc/tests/__init__.py +13 -0
  159. maxframe/tensor/{base → misc}/transpose.py +22 -18
  160. maxframe/tensor/{base → misc}/unique.py +3 -3
  161. maxframe/tensor/operators.py +1 -7
  162. maxframe/tensor/random/core.py +1 -1
  163. maxframe/tensor/reduction/count_nonzero.py +2 -1
  164. maxframe/tensor/reduction/mean.py +1 -0
  165. maxframe/tensor/reduction/nanmean.py +1 -0
  166. maxframe/tensor/reduction/nanvar.py +2 -0
  167. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  168. maxframe/tensor/reduction/var.py +2 -0
  169. maxframe/tensor/statistics/quantile.py +2 -2
  170. maxframe/tensor/utils.py +2 -22
  171. maxframe/tests/test_protocol.py +34 -0
  172. maxframe/tests/test_utils.py +0 -12
  173. maxframe/tests/utils.py +17 -2
  174. maxframe/typing_.py +4 -1
  175. maxframe/udf.py +8 -9
  176. maxframe/utils.py +106 -86
  177. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
  178. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
  179. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
  180. maxframe_client/__init__.py +0 -1
  181. maxframe_client/clients/framedriver.py +4 -1
  182. maxframe_client/fetcher.py +81 -74
  183. maxframe_client/session/consts.py +3 -0
  184. maxframe_client/session/graph.py +8 -2
  185. maxframe_client/session/odps.py +194 -40
  186. maxframe_client/session/task.py +94 -39
  187. maxframe_client/tests/test_fetcher.py +21 -3
  188. maxframe_client/tests/test_session.py +109 -8
  189. maxframe/core/entity/chunks.py +0 -68
  190. maxframe/core/entity/fuse.py +0 -73
  191. maxframe/core/graph/builder/chunk.py +0 -430
  192. maxframe/odpsio/tableio.py +0 -322
  193. maxframe/odpsio/volumeio.py +0 -95
  194. maxframe_client/clients/spe.py +0 -104
  195. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  196. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  197. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  198. /maxframe/tensor/{base → misc}/astype.py +0 -0
  199. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  200. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  201. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  202. /maxframe/tensor/{base → misc}/where.py +0 -0
  203. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe/tests/utils.py CHANGED
@@ -14,6 +14,7 @@
14
14
 
15
15
  import asyncio
16
16
  import functools
17
+ import hashlib
17
18
  import os
18
19
  import queue
19
20
  import socket
@@ -25,7 +26,7 @@ import pytest
25
26
  from tornado import netutil
26
27
 
27
28
  from ..core import Tileable, TileableGraph
28
- from ..utils import lazy_import
29
+ from ..utils import create_sync_primitive, lazy_import, to_binary
29
30
 
30
31
  try:
31
32
  from flaky import flaky
@@ -102,7 +103,7 @@ def run_app_in_thread(app_func):
102
103
  def fixture_func(*args, **kwargs):
103
104
  app_loop = asyncio.new_event_loop()
104
105
  q = queue.Queue()
105
- exit_event = asyncio.Event(loop=app_loop)
106
+ exit_event = create_sync_primitive(asyncio.Event, app_loop)
106
107
  app_thread = Thread(
107
108
  name="TestAppThread",
108
109
  target=app_thread_func,
@@ -162,3 +163,17 @@ def require_hadoop(func):
162
163
  not os.environ.get("WITH_HADOOP"), reason="Only run when hadoop is installed"
163
164
  )(func)
164
165
  return func
166
+
167
+
168
+ def get_test_unique_name(size=None):
169
+ test_name = os.getenv("PYTEST_CURRENT_TEST", "pyodps_test")
170
+ digest = hashlib.md5(to_binary(test_name)).hexdigest()
171
+ if size:
172
+ digest = digest[:size]
173
+ return digest + "_" + str(os.getpid())
174
+
175
+
176
+ def assert_mf_index_dtype(idx_obj, dtype):
177
+ from ..dataframe.core import IndexValue
178
+
179
+ assert isinstance(idx_obj, IndexValue.IndexBase) and idx_obj.dtype == dtype
maxframe/typing_.py CHANGED
@@ -12,11 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import TypeVar, Union
15
+ from numbers import Integral
16
+ from typing import List, TypeVar, Union
16
17
 
17
18
  import pandas as pd
18
19
  import pyarrow as pa
19
20
 
21
+ SlicesType = List[Union[None, Integral, slice]]
22
+
20
23
  TimeoutType = Union[int, float, None]
21
24
 
22
25
 
maxframe/udf.py CHANGED
@@ -29,28 +29,25 @@ from .utils import tokenize
29
29
 
30
30
 
31
31
  class PythonPackOptions(Serializable):
32
+ _key_args = ("force_rebuild", "prefer_binary", "pre_release", "no_audit_wheel")
33
+
32
34
  key = StringField("key")
33
35
  requirements = ListField("requirements", FieldTypes.string, default_factory=list)
34
36
  force_rebuild = BoolField("force_rebuild", default=False)
35
37
  prefer_binary = BoolField("prefer_binary", default=False)
36
38
  pre_release = BoolField("pre_release", default=False)
37
39
  pack_instance_id = StringField("pack_instance_id", default=None)
40
+ no_audit_wheel = BoolField("no_audit_wheel", default=False)
38
41
 
39
42
  def __init__(self, key: str = None, **kw):
40
43
  super().__init__(key=key, **kw)
41
44
  if self.key is None:
42
- args = {
43
- "force_rebuild": self.force_rebuild,
44
- "prefer_binary": self.prefer_binary,
45
- "pre_release": self.pre_release,
46
- }
45
+ args = {k: getattr(self, k) for k in self._key_args}
47
46
  self.key = tokenize(set(self.requirements), args)
48
47
 
49
48
  def __repr__(self):
50
- return (
51
- f"<PythonPackOptions {self.requirements} force_rebuild={self.force_rebuild} "
52
- f"prefer_binary={self.prefer_binary} pre_release={self.pre_release}>"
53
- )
49
+ args_str = " ".join(f"{k}={getattr(self, k)}" for k in self._key_args)
50
+ return f"<PythonPackOptions {self.requirements} {args_str}>"
54
51
 
55
52
 
56
53
  class MarkedFunction(Serializable):
@@ -101,6 +98,7 @@ def with_python_requirements(
101
98
  force_rebuild: bool = False,
102
99
  prefer_binary: bool = False,
103
100
  pre_release: bool = False,
101
+ no_audit_wheel: bool = False,
104
102
  ):
105
103
  result_req = []
106
104
  for req in requirements:
@@ -112,6 +110,7 @@ def with_python_requirements(
112
110
  force_rebuild=force_rebuild,
113
111
  prefer_binary=prefer_binary,
114
112
  pre_release=pre_release,
113
+ no_audit_wheel=no_audit_wheel,
115
114
  )
116
115
  if isinstance(func, MarkedFunction):
117
116
  func.pythonpacks.append(pack_item)
maxframe/utils.py CHANGED
@@ -19,7 +19,6 @@ import dataclasses
19
19
  import datetime
20
20
  import enum
21
21
  import functools
22
- import hashlib
23
22
  import importlib
24
23
  import inspect
25
24
  import io
@@ -33,7 +32,6 @@ import sys
33
32
  import threading
34
33
  import time
35
34
  import tokenize as pytokenize
36
- import traceback
37
35
  import types
38
36
  import weakref
39
37
  import zlib
@@ -76,7 +74,7 @@ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
76
74
  tokenize_int,
77
75
  )
78
76
  from .lib.version import parse as parse_version
79
- from .typing_ import ChunkType, EntityType, TileableType, TimeoutType
77
+ from .typing_ import TileableType, TimeoutType
80
78
 
81
79
  # make flake8 happy by referencing these imports
82
80
  NamedType = NamedType
@@ -246,58 +244,6 @@ def copy_tileables(tileables: List[TileableType], **kwargs):
246
244
  return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
247
245
 
248
246
 
249
- def build_fetch_chunk(chunk: ChunkType, **kwargs) -> ChunkType:
250
- from .core.operator import ShuffleProxy
251
-
252
- chunk_op = chunk.op
253
- params = chunk.params.copy()
254
- assert not isinstance(chunk_op, ShuffleProxy)
255
- # for non-shuffle nodes, we build Fetch chunks
256
- # to replace original chunk
257
- op = chunk_op.get_fetch_op_cls(chunk)(sparse=chunk.op.sparse, gpu=chunk.op.gpu)
258
- return op.new_chunk(
259
- None,
260
- is_broadcaster=chunk.is_broadcaster,
261
- kws=[params],
262
- _key=chunk.key,
263
- **kwargs,
264
- )
265
-
266
-
267
- def build_fetch_tileable(tileable: TileableType) -> TileableType:
268
- if tileable.is_coarse():
269
- chunks = None
270
- else:
271
- chunks = []
272
- for c in tileable.chunks:
273
- fetch_chunk = build_fetch_chunk(c, index=c.index)
274
- chunks.append(fetch_chunk)
275
-
276
- tileable_op = tileable.op
277
- params = tileable.params.copy()
278
-
279
- new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
280
- return new_op.new_tileables(
281
- None,
282
- chunks=chunks,
283
- nsplits=tileable.nsplits,
284
- _key=tileable.key,
285
- _id=tileable.id,
286
- **params,
287
- )[0]
288
-
289
-
290
- def build_fetch(entity: EntityType) -> EntityType:
291
- from .core import CHUNK_TYPE, ENTITY_TYPE
292
-
293
- if isinstance(entity, CHUNK_TYPE):
294
- return build_fetch_chunk(entity)
295
- elif isinstance(entity, ENTITY_TYPE):
296
- return build_fetch_tileable(entity)
297
- else:
298
- raise TypeError(f"Type {type(entity)} not supported")
299
-
300
-
301
247
  def get_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
302
248
  if pd.api.types.is_extension_array_dtype(dtype):
303
249
  return dtype
@@ -387,25 +333,7 @@ def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> st
387
333
 
388
334
 
389
335
  def build_session_volume_name(session_id: str) -> str:
390
- return f"mf_vol_{session_id}"
391
-
392
-
393
- def build_tileable_dir_name(tileable_key: str) -> str:
394
- m = hashlib.md5()
395
- m.update(f"mf_dir_{tileable_key}".encode())
396
- return m.hexdigest()
397
-
398
-
399
- def extract_messages_and_stacks(exc: Exception) -> Tuple[List[str], List[str]]:
400
- cur_exc = exc
401
- messages, stacks = [], []
402
- while True:
403
- messages.append(str(cur_exc))
404
- stacks.append("".join(traceback.format_tb(cur_exc.__traceback__)))
405
- if exc.__cause__ is None:
406
- break
407
- cur_exc = exc.__cause__
408
- return messages, stacks
336
+ return f"mf_vol_{session_id.replace('-', '_')}"
409
337
 
410
338
 
411
339
  async def wait_http_response(
@@ -442,11 +370,27 @@ def format_timeout_params(timeout: TimeoutType) -> str:
442
370
  return f"?wait=1&timeout={timeout}"
443
371
 
444
372
 
445
- async def to_thread_pool(func, *args, pool=None, **kwargs):
446
- loop = asyncio.events.get_running_loop()
447
- ctx = contextvars.copy_context()
448
- func_call = functools.partial(ctx.run, func, *args, **kwargs)
449
- return await loop.run_in_executor(pool, func_call)
373
+ _PrimitiveType = TypeVar("_PrimitiveType")
374
+
375
+
376
+ def create_sync_primitive(
377
+ cls: Type[_PrimitiveType], loop: asyncio.AbstractEventLoop
378
+ ) -> _PrimitiveType:
379
+ """
380
+ Create an asyncio sync primitive (locks, events, etc.)
381
+ in a certain event loop.
382
+ """
383
+ if sys.version_info[1] < 10:
384
+ return cls(loop=loop)
385
+
386
+ # From Python3.10 the loop parameter has been removed. We should work around here.
387
+ old_loop = asyncio.get_event_loop()
388
+ try:
389
+ asyncio.set_event_loop(loop)
390
+ primitive = cls()
391
+ finally:
392
+ asyncio.set_event_loop(old_loop)
393
+ return primitive
450
394
 
451
395
 
452
396
  class ToThreadCancelledError(asyncio.CancelledError):
@@ -487,15 +431,22 @@ class ToThreadMixin:
487
431
  thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
488
432
  )
489
433
 
490
- task = asyncio.create_task(
491
- to_thread_pool(func, *args, **kwargs, pool=self._pool)
492
- )
434
+ loop = asyncio.events.get_running_loop()
435
+ ctx = contextvars.copy_context()
436
+ func_call = functools.partial(ctx.run, func, *args, **kwargs)
437
+ fut = loop.run_in_executor(self._pool, func_call)
438
+
493
439
  try:
494
- return await asyncio.wait_for(asyncio.shield(task), timeout)
440
+ coro = fut
441
+ if wait_on_cancel:
442
+ coro = asyncio.shield(coro)
443
+ if timeout is not None:
444
+ coro = asyncio.wait_for(coro, timeout)
445
+ return await coro
495
446
  except (asyncio.CancelledError, asyncio.TimeoutError) as ex:
496
447
  if not wait_on_cancel:
497
448
  raise
498
- result = await task
449
+ result = await fut
499
450
  raise ToThreadCancelledError(*ex.args, result=result)
500
451
 
501
452
  def ensure_async_call(
@@ -519,6 +470,7 @@ def config_odps_default_options():
519
470
  "metaservice.client.cache.enable": "false",
520
471
  "odps.sql.session.result.cache.enable": "false",
521
472
  "odps.sql.submit.mode": "script",
473
+ "odps.sql.job.max.time.hours": 72,
522
474
  }
523
475
 
524
476
 
@@ -883,8 +835,41 @@ def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
883
835
  raise ValueError(f"Unknown limitation value: {value}")
884
836
 
885
837
 
886
- def remove_suffix(value: str, suffix: str) -> str:
887
- return value[: -len(suffix)] if value.endswith(suffix) else value
838
+ def remove_suffix(value: str, suffix: str) -> Tuple[str, bool]:
839
+ """
840
+ Remove a suffix from a given string if it exists.
841
+
842
+ Parameters
843
+ ----------
844
+ value : str
845
+ The original string.
846
+ suffix : str
847
+ The suffix to be removed.
848
+
849
+ Returns
850
+ -------
851
+ Tuple[str, bool]
852
+ A tuple containing the modified string and a boolean indicating whether the suffix was found.
853
+ """
854
+
855
+ # Check if the suffix is an empty string
856
+ if len(suffix) == 0:
857
+ # If the suffix is empty, return the original string with True
858
+ return value, True
859
+
860
+ # Check if the length of the value is less than the length of the suffix
861
+ if len(value) < len(suffix):
862
+ # If the value is shorter than the suffix, it cannot have the suffix
863
+ return value, False
864
+
865
+ # Check if the suffix matches the end of the value
866
+ match = value.endswith(suffix)
867
+
868
+ # If the suffix is found, remove it; otherwise, return the original string
869
+ if match:
870
+ return value[: -len(suffix)], match
871
+ else:
872
+ return value, match
888
873
 
889
874
 
890
875
  def find_objects(nested: Union[List, Dict], types: Union[Type, Tuple[Type]]) -> List:
@@ -1112,3 +1097,38 @@ def get_item_if_scalar(val: Any) -> Any:
1112
1097
  if isinstance(val, np.ndarray) and val.shape == ():
1113
1098
  return val.item()
1114
1099
  return val
1100
+
1101
+
1102
+ def collect_leaf_operators(root) -> List[Type]:
1103
+ result = []
1104
+
1105
+ def _collect(op_type):
1106
+ if len(op_type.__subclasses__()) == 0:
1107
+ result.append(op_type)
1108
+ for subclass in op_type.__subclasses__():
1109
+ _collect(subclass)
1110
+
1111
+ _collect(root)
1112
+ return result
1113
+
1114
+
1115
+ @contextmanager
1116
+ def sync_pyodps_options():
1117
+ from odps.config import OptionError
1118
+ from odps.config import option_context as pyodps_option_context
1119
+
1120
+ from .config import options
1121
+
1122
+ with pyodps_option_context() as cfg:
1123
+ cfg.local_timezone = options.local_timezone
1124
+ if options.session.enable_schema:
1125
+ try:
1126
+ cfg.enable_schema = options.session.enable_schema
1127
+ except (AttributeError, OptionError):
1128
+ # fixme enable_schema only supported in PyODPS 0.12.0 or later
1129
+ cfg.always_enable_schema = options.session.enable_schema
1130
+ yield
1131
+
1132
+
1133
+ def str_to_bool(s: Optional[str]) -> Optional[bool]:
1134
+ return s.lower().strip() in ("true", "1") if s is not None else None
@@ -1,33 +1,33 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 0.1.0b5
3
+ Version: 1.0.0
4
4
  Summary: MaxFrame operator-based data analyze framework
5
- Requires-Dist: numpy >=1.19.0
6
- Requires-Dist: pandas >=1.0.0
7
- Requires-Dist: pyodps >=0.11.6.1
8
- Requires-Dist: scipy >=1.0
9
- Requires-Dist: pyarrow >=1.0.0
10
- Requires-Dist: msgpack >=1.0.0
11
- Requires-Dist: traitlets >=5.0
12
- Requires-Dist: cloudpickle >=1.5.0
13
- Requires-Dist: pyyaml >=5.1
14
- Requires-Dist: tornado >=6.0
15
- Requires-Dist: defusedxml >=0.5.0
16
- Requires-Dist: tqdm >=4.1.0
17
- Requires-Dist: importlib-metadata >=1.4
18
- Requires-Dist: pickle5 ; python_version < "3.8"
5
+ Requires-Dist: numpy<2.0.0,>=1.19.0
6
+ Requires-Dist: pandas>=1.0.0
7
+ Requires-Dist: pyodps>=0.11.6.1
8
+ Requires-Dist: scipy>=1.0
9
+ Requires-Dist: pyarrow>=1.0.0
10
+ Requires-Dist: msgpack>=1.0.0
11
+ Requires-Dist: traitlets>=5.0
12
+ Requires-Dist: cloudpickle<3.0.0,>=1.5.0
13
+ Requires-Dist: pyyaml>=5.1
14
+ Requires-Dist: tornado>=6.0
15
+ Requires-Dist: defusedxml>=0.5.0
16
+ Requires-Dist: tqdm>=4.1.0
17
+ Requires-Dist: importlib-metadata>=1.4
18
+ Requires-Dist: pickle5; python_version < "3.8"
19
19
  Provides-Extra: dev
20
- Requires-Dist: black >=22.3.0 ; extra == 'dev'
21
- Requires-Dist: flake8 >=5.0.4 ; extra == 'dev'
22
- Requires-Dist: pre-commit >=2.15.0 ; extra == 'dev'
23
- Requires-Dist: graphviz >=0.20.1 ; extra == 'dev'
20
+ Requires-Dist: black>=22.3.0; extra == "dev"
21
+ Requires-Dist: flake8>=5.0.4; extra == "dev"
22
+ Requires-Dist: pre-commit>=2.15.0; extra == "dev"
23
+ Requires-Dist: graphviz>=0.20.1; extra == "dev"
24
24
  Provides-Extra: test
25
- Requires-Dist: mock ; extra == 'test'
26
- Requires-Dist: pytest >=7.3.1 ; extra == 'test'
27
- Requires-Dist: pytest-cov >=4.1.0 ; extra == 'test'
28
- Requires-Dist: pytest-asyncio >=0.21.0 ; extra == 'test'
29
- Requires-Dist: pytest-timeout >=2.1.0 ; extra == 'test'
30
- Requires-Dist: matplotlib >=2.0.0 ; extra == 'test'
25
+ Requires-Dist: mock; extra == "test"
26
+ Requires-Dist: pytest>=7.3.1; extra == "test"
27
+ Requires-Dist: pytest-cov>=4.1.0; extra == "test"
28
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
29
+ Requires-Dist: pytest-timeout>=2.1.0; extra == "test"
30
+ Requires-Dist: matplotlib>=2.0.0; extra == "test"
31
31
 
32
32
  MaxCompute MaxFrame Client
33
33
  ==========================