maxframe 0.1.0b5__cp38-cp38-win32.whl → 1.0.0__cp38-cp38-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp38-win32.pyd +0 -0
- maxframe/codegen.py +10 -4
- maxframe/config/config.py +68 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp38-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +31 -7
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +117 -23
- maxframe/dataframe/datasource/read_odps_table.py +6 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +33 -2
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +26 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
- maxframe/learn/contrib/xgboost/predict.py +29 -46
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +29 -18
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp38-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +8 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp38-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/{base → misc}/unique.py +3 -3
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +106 -86
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +81 -74
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +194 -40
- maxframe_client/session/task.py +94 -39
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +109 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -322
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe/tests/utils.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import functools
|
|
17
|
+
import hashlib
|
|
17
18
|
import os
|
|
18
19
|
import queue
|
|
19
20
|
import socket
|
|
@@ -25,7 +26,7 @@ import pytest
|
|
|
25
26
|
from tornado import netutil
|
|
26
27
|
|
|
27
28
|
from ..core import Tileable, TileableGraph
|
|
28
|
-
from ..utils import lazy_import
|
|
29
|
+
from ..utils import create_sync_primitive, lazy_import, to_binary
|
|
29
30
|
|
|
30
31
|
try:
|
|
31
32
|
from flaky import flaky
|
|
@@ -102,7 +103,7 @@ def run_app_in_thread(app_func):
|
|
|
102
103
|
def fixture_func(*args, **kwargs):
|
|
103
104
|
app_loop = asyncio.new_event_loop()
|
|
104
105
|
q = queue.Queue()
|
|
105
|
-
exit_event = asyncio.Event
|
|
106
|
+
exit_event = create_sync_primitive(asyncio.Event, app_loop)
|
|
106
107
|
app_thread = Thread(
|
|
107
108
|
name="TestAppThread",
|
|
108
109
|
target=app_thread_func,
|
|
@@ -162,3 +163,17 @@ def require_hadoop(func):
|
|
|
162
163
|
not os.environ.get("WITH_HADOOP"), reason="Only run when hadoop is installed"
|
|
163
164
|
)(func)
|
|
164
165
|
return func
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_test_unique_name(size=None):
|
|
169
|
+
test_name = os.getenv("PYTEST_CURRENT_TEST", "pyodps_test")
|
|
170
|
+
digest = hashlib.md5(to_binary(test_name)).hexdigest()
|
|
171
|
+
if size:
|
|
172
|
+
digest = digest[:size]
|
|
173
|
+
return digest + "_" + str(os.getpid())
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def assert_mf_index_dtype(idx_obj, dtype):
|
|
177
|
+
from ..dataframe.core import IndexValue
|
|
178
|
+
|
|
179
|
+
assert isinstance(idx_obj, IndexValue.IndexBase) and idx_obj.dtype == dtype
|
maxframe/typing_.py
CHANGED
|
@@ -12,11 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from numbers import Integral
|
|
16
|
+
from typing import List, TypeVar, Union
|
|
16
17
|
|
|
17
18
|
import pandas as pd
|
|
18
19
|
import pyarrow as pa
|
|
19
20
|
|
|
21
|
+
SlicesType = List[Union[None, Integral, slice]]
|
|
22
|
+
|
|
20
23
|
TimeoutType = Union[int, float, None]
|
|
21
24
|
|
|
22
25
|
|
maxframe/udf.py
CHANGED
|
@@ -29,28 +29,25 @@ from .utils import tokenize
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class PythonPackOptions(Serializable):
|
|
32
|
+
_key_args = ("force_rebuild", "prefer_binary", "pre_release", "no_audit_wheel")
|
|
33
|
+
|
|
32
34
|
key = StringField("key")
|
|
33
35
|
requirements = ListField("requirements", FieldTypes.string, default_factory=list)
|
|
34
36
|
force_rebuild = BoolField("force_rebuild", default=False)
|
|
35
37
|
prefer_binary = BoolField("prefer_binary", default=False)
|
|
36
38
|
pre_release = BoolField("pre_release", default=False)
|
|
37
39
|
pack_instance_id = StringField("pack_instance_id", default=None)
|
|
40
|
+
no_audit_wheel = BoolField("no_audit_wheel", default=False)
|
|
38
41
|
|
|
39
42
|
def __init__(self, key: str = None, **kw):
|
|
40
43
|
super().__init__(key=key, **kw)
|
|
41
44
|
if self.key is None:
|
|
42
|
-
args = {
|
|
43
|
-
"force_rebuild": self.force_rebuild,
|
|
44
|
-
"prefer_binary": self.prefer_binary,
|
|
45
|
-
"pre_release": self.pre_release,
|
|
46
|
-
}
|
|
45
|
+
args = {k: getattr(self, k) for k in self._key_args}
|
|
47
46
|
self.key = tokenize(set(self.requirements), args)
|
|
48
47
|
|
|
49
48
|
def __repr__(self):
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
f"prefer_binary={self.prefer_binary} pre_release={self.pre_release}>"
|
|
53
|
-
)
|
|
49
|
+
args_str = " ".join(f"{k}={getattr(self, k)}" for k in self._key_args)
|
|
50
|
+
return f"<PythonPackOptions {self.requirements} {args_str}>"
|
|
54
51
|
|
|
55
52
|
|
|
56
53
|
class MarkedFunction(Serializable):
|
|
@@ -101,6 +98,7 @@ def with_python_requirements(
|
|
|
101
98
|
force_rebuild: bool = False,
|
|
102
99
|
prefer_binary: bool = False,
|
|
103
100
|
pre_release: bool = False,
|
|
101
|
+
no_audit_wheel: bool = False,
|
|
104
102
|
):
|
|
105
103
|
result_req = []
|
|
106
104
|
for req in requirements:
|
|
@@ -112,6 +110,7 @@ def with_python_requirements(
|
|
|
112
110
|
force_rebuild=force_rebuild,
|
|
113
111
|
prefer_binary=prefer_binary,
|
|
114
112
|
pre_release=pre_release,
|
|
113
|
+
no_audit_wheel=no_audit_wheel,
|
|
115
114
|
)
|
|
116
115
|
if isinstance(func, MarkedFunction):
|
|
117
116
|
func.pythonpacks.append(pack_item)
|
maxframe/utils.py
CHANGED
|
@@ -19,7 +19,6 @@ import dataclasses
|
|
|
19
19
|
import datetime
|
|
20
20
|
import enum
|
|
21
21
|
import functools
|
|
22
|
-
import hashlib
|
|
23
22
|
import importlib
|
|
24
23
|
import inspect
|
|
25
24
|
import io
|
|
@@ -33,7 +32,6 @@ import sys
|
|
|
33
32
|
import threading
|
|
34
33
|
import time
|
|
35
34
|
import tokenize as pytokenize
|
|
36
|
-
import traceback
|
|
37
35
|
import types
|
|
38
36
|
import weakref
|
|
39
37
|
import zlib
|
|
@@ -76,7 +74,7 @@ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
|
|
|
76
74
|
tokenize_int,
|
|
77
75
|
)
|
|
78
76
|
from .lib.version import parse as parse_version
|
|
79
|
-
from .typing_ import
|
|
77
|
+
from .typing_ import TileableType, TimeoutType
|
|
80
78
|
|
|
81
79
|
# make flake8 happy by referencing these imports
|
|
82
80
|
NamedType = NamedType
|
|
@@ -246,58 +244,6 @@ def copy_tileables(tileables: List[TileableType], **kwargs):
|
|
|
246
244
|
return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
|
|
247
245
|
|
|
248
246
|
|
|
249
|
-
def build_fetch_chunk(chunk: ChunkType, **kwargs) -> ChunkType:
|
|
250
|
-
from .core.operator import ShuffleProxy
|
|
251
|
-
|
|
252
|
-
chunk_op = chunk.op
|
|
253
|
-
params = chunk.params.copy()
|
|
254
|
-
assert not isinstance(chunk_op, ShuffleProxy)
|
|
255
|
-
# for non-shuffle nodes, we build Fetch chunks
|
|
256
|
-
# to replace original chunk
|
|
257
|
-
op = chunk_op.get_fetch_op_cls(chunk)(sparse=chunk.op.sparse, gpu=chunk.op.gpu)
|
|
258
|
-
return op.new_chunk(
|
|
259
|
-
None,
|
|
260
|
-
is_broadcaster=chunk.is_broadcaster,
|
|
261
|
-
kws=[params],
|
|
262
|
-
_key=chunk.key,
|
|
263
|
-
**kwargs,
|
|
264
|
-
)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
def build_fetch_tileable(tileable: TileableType) -> TileableType:
|
|
268
|
-
if tileable.is_coarse():
|
|
269
|
-
chunks = None
|
|
270
|
-
else:
|
|
271
|
-
chunks = []
|
|
272
|
-
for c in tileable.chunks:
|
|
273
|
-
fetch_chunk = build_fetch_chunk(c, index=c.index)
|
|
274
|
-
chunks.append(fetch_chunk)
|
|
275
|
-
|
|
276
|
-
tileable_op = tileable.op
|
|
277
|
-
params = tileable.params.copy()
|
|
278
|
-
|
|
279
|
-
new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
|
|
280
|
-
return new_op.new_tileables(
|
|
281
|
-
None,
|
|
282
|
-
chunks=chunks,
|
|
283
|
-
nsplits=tileable.nsplits,
|
|
284
|
-
_key=tileable.key,
|
|
285
|
-
_id=tileable.id,
|
|
286
|
-
**params,
|
|
287
|
-
)[0]
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
def build_fetch(entity: EntityType) -> EntityType:
|
|
291
|
-
from .core import CHUNK_TYPE, ENTITY_TYPE
|
|
292
|
-
|
|
293
|
-
if isinstance(entity, CHUNK_TYPE):
|
|
294
|
-
return build_fetch_chunk(entity)
|
|
295
|
-
elif isinstance(entity, ENTITY_TYPE):
|
|
296
|
-
return build_fetch_tileable(entity)
|
|
297
|
-
else:
|
|
298
|
-
raise TypeError(f"Type {type(entity)} not supported")
|
|
299
|
-
|
|
300
|
-
|
|
301
247
|
def get_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
|
|
302
248
|
if pd.api.types.is_extension_array_dtype(dtype):
|
|
303
249
|
return dtype
|
|
@@ -387,25 +333,7 @@ def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> st
|
|
|
387
333
|
|
|
388
334
|
|
|
389
335
|
def build_session_volume_name(session_id: str) -> str:
|
|
390
|
-
return f"mf_vol_{session_id}"
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
def build_tileable_dir_name(tileable_key: str) -> str:
|
|
394
|
-
m = hashlib.md5()
|
|
395
|
-
m.update(f"mf_dir_{tileable_key}".encode())
|
|
396
|
-
return m.hexdigest()
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
def extract_messages_and_stacks(exc: Exception) -> Tuple[List[str], List[str]]:
|
|
400
|
-
cur_exc = exc
|
|
401
|
-
messages, stacks = [], []
|
|
402
|
-
while True:
|
|
403
|
-
messages.append(str(cur_exc))
|
|
404
|
-
stacks.append("".join(traceback.format_tb(cur_exc.__traceback__)))
|
|
405
|
-
if exc.__cause__ is None:
|
|
406
|
-
break
|
|
407
|
-
cur_exc = exc.__cause__
|
|
408
|
-
return messages, stacks
|
|
336
|
+
return f"mf_vol_{session_id.replace('-', '_')}"
|
|
409
337
|
|
|
410
338
|
|
|
411
339
|
async def wait_http_response(
|
|
@@ -442,11 +370,27 @@ def format_timeout_params(timeout: TimeoutType) -> str:
|
|
|
442
370
|
return f"?wait=1&timeout={timeout}"
|
|
443
371
|
|
|
444
372
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
373
|
+
_PrimitiveType = TypeVar("_PrimitiveType")
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def create_sync_primitive(
|
|
377
|
+
cls: Type[_PrimitiveType], loop: asyncio.AbstractEventLoop
|
|
378
|
+
) -> _PrimitiveType:
|
|
379
|
+
"""
|
|
380
|
+
Create an asyncio sync primitive (locks, events, etc.)
|
|
381
|
+
in a certain event loop.
|
|
382
|
+
"""
|
|
383
|
+
if sys.version_info[1] < 10:
|
|
384
|
+
return cls(loop=loop)
|
|
385
|
+
|
|
386
|
+
# From Python3.10 the loop parameter has been removed. We should work around here.
|
|
387
|
+
old_loop = asyncio.get_event_loop()
|
|
388
|
+
try:
|
|
389
|
+
asyncio.set_event_loop(loop)
|
|
390
|
+
primitive = cls()
|
|
391
|
+
finally:
|
|
392
|
+
asyncio.set_event_loop(old_loop)
|
|
393
|
+
return primitive
|
|
450
394
|
|
|
451
395
|
|
|
452
396
|
class ToThreadCancelledError(asyncio.CancelledError):
|
|
@@ -487,15 +431,22 @@ class ToThreadMixin:
|
|
|
487
431
|
thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
|
|
488
432
|
)
|
|
489
433
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
)
|
|
434
|
+
loop = asyncio.events.get_running_loop()
|
|
435
|
+
ctx = contextvars.copy_context()
|
|
436
|
+
func_call = functools.partial(ctx.run, func, *args, **kwargs)
|
|
437
|
+
fut = loop.run_in_executor(self._pool, func_call)
|
|
438
|
+
|
|
493
439
|
try:
|
|
494
|
-
|
|
440
|
+
coro = fut
|
|
441
|
+
if wait_on_cancel:
|
|
442
|
+
coro = asyncio.shield(coro)
|
|
443
|
+
if timeout is not None:
|
|
444
|
+
coro = asyncio.wait_for(coro, timeout)
|
|
445
|
+
return await coro
|
|
495
446
|
except (asyncio.CancelledError, asyncio.TimeoutError) as ex:
|
|
496
447
|
if not wait_on_cancel:
|
|
497
448
|
raise
|
|
498
|
-
result = await
|
|
449
|
+
result = await fut
|
|
499
450
|
raise ToThreadCancelledError(*ex.args, result=result)
|
|
500
451
|
|
|
501
452
|
def ensure_async_call(
|
|
@@ -519,6 +470,7 @@ def config_odps_default_options():
|
|
|
519
470
|
"metaservice.client.cache.enable": "false",
|
|
520
471
|
"odps.sql.session.result.cache.enable": "false",
|
|
521
472
|
"odps.sql.submit.mode": "script",
|
|
473
|
+
"odps.sql.job.max.time.hours": 72,
|
|
522
474
|
}
|
|
523
475
|
|
|
524
476
|
|
|
@@ -883,8 +835,41 @@ def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
|
|
|
883
835
|
raise ValueError(f"Unknown limitation value: {value}")
|
|
884
836
|
|
|
885
837
|
|
|
886
|
-
def remove_suffix(value: str, suffix: str) -> str:
|
|
887
|
-
|
|
838
|
+
def remove_suffix(value: str, suffix: str) -> Tuple[str, bool]:
|
|
839
|
+
"""
|
|
840
|
+
Remove a suffix from a given string if it exists.
|
|
841
|
+
|
|
842
|
+
Parameters
|
|
843
|
+
----------
|
|
844
|
+
value : str
|
|
845
|
+
The original string.
|
|
846
|
+
suffix : str
|
|
847
|
+
The suffix to be removed.
|
|
848
|
+
|
|
849
|
+
Returns
|
|
850
|
+
-------
|
|
851
|
+
Tuple[str, bool]
|
|
852
|
+
A tuple containing the modified string and a boolean indicating whether the suffix was found.
|
|
853
|
+
"""
|
|
854
|
+
|
|
855
|
+
# Check if the suffix is an empty string
|
|
856
|
+
if len(suffix) == 0:
|
|
857
|
+
# If the suffix is empty, return the original string with True
|
|
858
|
+
return value, True
|
|
859
|
+
|
|
860
|
+
# Check if the length of the value is less than the length of the suffix
|
|
861
|
+
if len(value) < len(suffix):
|
|
862
|
+
# If the value is shorter than the suffix, it cannot have the suffix
|
|
863
|
+
return value, False
|
|
864
|
+
|
|
865
|
+
# Check if the suffix matches the end of the value
|
|
866
|
+
match = value.endswith(suffix)
|
|
867
|
+
|
|
868
|
+
# If the suffix is found, remove it; otherwise, return the original string
|
|
869
|
+
if match:
|
|
870
|
+
return value[: -len(suffix)], match
|
|
871
|
+
else:
|
|
872
|
+
return value, match
|
|
888
873
|
|
|
889
874
|
|
|
890
875
|
def find_objects(nested: Union[List, Dict], types: Union[Type, Tuple[Type]]) -> List:
|
|
@@ -1112,3 +1097,38 @@ def get_item_if_scalar(val: Any) -> Any:
|
|
|
1112
1097
|
if isinstance(val, np.ndarray) and val.shape == ():
|
|
1113
1098
|
return val.item()
|
|
1114
1099
|
return val
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
def collect_leaf_operators(root) -> List[Type]:
|
|
1103
|
+
result = []
|
|
1104
|
+
|
|
1105
|
+
def _collect(op_type):
|
|
1106
|
+
if len(op_type.__subclasses__()) == 0:
|
|
1107
|
+
result.append(op_type)
|
|
1108
|
+
for subclass in op_type.__subclasses__():
|
|
1109
|
+
_collect(subclass)
|
|
1110
|
+
|
|
1111
|
+
_collect(root)
|
|
1112
|
+
return result
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
@contextmanager
|
|
1116
|
+
def sync_pyodps_options():
|
|
1117
|
+
from odps.config import OptionError
|
|
1118
|
+
from odps.config import option_context as pyodps_option_context
|
|
1119
|
+
|
|
1120
|
+
from .config import options
|
|
1121
|
+
|
|
1122
|
+
with pyodps_option_context() as cfg:
|
|
1123
|
+
cfg.local_timezone = options.local_timezone
|
|
1124
|
+
if options.session.enable_schema:
|
|
1125
|
+
try:
|
|
1126
|
+
cfg.enable_schema = options.session.enable_schema
|
|
1127
|
+
except (AttributeError, OptionError):
|
|
1128
|
+
# fixme enable_schema only supported in PyODPS 0.12.0 or later
|
|
1129
|
+
cfg.always_enable_schema = options.session.enable_schema
|
|
1130
|
+
yield
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
def str_to_bool(s: Optional[str]) -> Optional[bool]:
|
|
1134
|
+
return s.lower().strip() in ("true", "1") if s is not None else None
|
|
@@ -1,33 +1,33 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: maxframe
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: MaxFrame operator-based data analyze framework
|
|
5
|
-
Requires-Dist: numpy
|
|
6
|
-
Requires-Dist: pandas
|
|
7
|
-
Requires-Dist: pyodps
|
|
8
|
-
Requires-Dist: scipy
|
|
9
|
-
Requires-Dist: pyarrow
|
|
10
|
-
Requires-Dist: msgpack
|
|
11
|
-
Requires-Dist: traitlets
|
|
12
|
-
Requires-Dist: cloudpickle
|
|
13
|
-
Requires-Dist: pyyaml
|
|
14
|
-
Requires-Dist: tornado
|
|
15
|
-
Requires-Dist: defusedxml
|
|
16
|
-
Requires-Dist: tqdm
|
|
17
|
-
Requires-Dist: importlib-metadata
|
|
18
|
-
Requires-Dist: pickle5
|
|
5
|
+
Requires-Dist: numpy<2.0.0,>=1.19.0
|
|
6
|
+
Requires-Dist: pandas>=1.0.0
|
|
7
|
+
Requires-Dist: pyodps>=0.11.6.1
|
|
8
|
+
Requires-Dist: scipy>=1.0
|
|
9
|
+
Requires-Dist: pyarrow>=1.0.0
|
|
10
|
+
Requires-Dist: msgpack>=1.0.0
|
|
11
|
+
Requires-Dist: traitlets>=5.0
|
|
12
|
+
Requires-Dist: cloudpickle<3.0.0,>=1.5.0
|
|
13
|
+
Requires-Dist: pyyaml>=5.1
|
|
14
|
+
Requires-Dist: tornado>=6.0
|
|
15
|
+
Requires-Dist: defusedxml>=0.5.0
|
|
16
|
+
Requires-Dist: tqdm>=4.1.0
|
|
17
|
+
Requires-Dist: importlib-metadata>=1.4
|
|
18
|
+
Requires-Dist: pickle5; python_version < "3.8"
|
|
19
19
|
Provides-Extra: dev
|
|
20
|
-
Requires-Dist: black
|
|
21
|
-
Requires-Dist: flake8
|
|
22
|
-
Requires-Dist: pre-commit
|
|
23
|
-
Requires-Dist: graphviz
|
|
20
|
+
Requires-Dist: black>=22.3.0; extra == "dev"
|
|
21
|
+
Requires-Dist: flake8>=5.0.4; extra == "dev"
|
|
22
|
+
Requires-Dist: pre-commit>=2.15.0; extra == "dev"
|
|
23
|
+
Requires-Dist: graphviz>=0.20.1; extra == "dev"
|
|
24
24
|
Provides-Extra: test
|
|
25
|
-
Requires-Dist: mock
|
|
26
|
-
Requires-Dist: pytest
|
|
27
|
-
Requires-Dist: pytest-cov
|
|
28
|
-
Requires-Dist: pytest-asyncio
|
|
29
|
-
Requires-Dist: pytest-timeout
|
|
30
|
-
Requires-Dist: matplotlib
|
|
25
|
+
Requires-Dist: mock; extra == "test"
|
|
26
|
+
Requires-Dist: pytest>=7.3.1; extra == "test"
|
|
27
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "test"
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
|
|
29
|
+
Requires-Dist: pytest-timeout>=2.1.0; extra == "test"
|
|
30
|
+
Requires-Dist: matplotlib>=2.0.0; extra == "test"
|
|
31
31
|
|
|
32
32
|
MaxCompute MaxFrame Client
|
|
33
33
|
==========================
|