maxframe 1.0.0rc2__cp311-cp311-win32.whl → 1.0.0rc4__cp311-cp311-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp311-win32.pyd +0 -0
- maxframe/codegen.py +4 -2
- maxframe/config/config.py +28 -9
- maxframe/config/validators.py +42 -12
- maxframe/conftest.py +56 -14
- maxframe/core/__init__.py +2 -13
- maxframe/core/entity/__init__.py +0 -4
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +45 -2
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp311-win32.pyd +0 -0
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/arithmetic/docstring.py +26 -2
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/core.py +2 -0
- maxframe/dataframe/datasource/read_odps_query.py +67 -8
- maxframe/dataframe/datasource/read_odps_table.py +4 -2
- maxframe/dataframe/datasource/tests/test_datasource.py +35 -6
- maxframe/dataframe/datastore/to_odps.py +8 -1
- maxframe/dataframe/extensions/__init__.py +3 -0
- maxframe/dataframe/extensions/flatmap.py +326 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +62 -1
- maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
- maxframe/dataframe/indexing/rename.py +11 -0
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/misc/drop_duplicates.py +18 -1
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +2 -0
- maxframe/{odpsio → io/odpsio}/arrow.py +4 -4
- maxframe/{odpsio → io/odpsio}/schema.py +10 -8
- maxframe/{odpsio → io/odpsio}/tableio.py +50 -38
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +3 -7
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +3 -3
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +2 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +1 -4
- maxframe/learn/contrib/xgboost/predict.py +27 -44
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +27 -16
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp311-win32.pyd +0 -0
- maxframe/opcodes.py +3 -0
- maxframe/protocol.py +7 -16
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp311-win32.pyd +0 -0
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +10 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +1 -0
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/utils.py +2 -22
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +49 -73
- maxframe-1.0.0rc4.dist-info/METADATA +104 -0
- {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc4.dist-info}/RECORD +129 -114
- {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc4.dist-info}/WHEEL +1 -1
- maxframe_client/fetcher.py +33 -50
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +134 -27
- maxframe_client/session/task.py +58 -20
- maxframe_client/tests/test_fetcher.py +1 -1
- maxframe_client/tests/test_session.py +27 -3
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/volumeio.py +0 -95
- maxframe-1.0.0rc2.dist-info/METADATA +0 -177
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/unique.py +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
1
|
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
4
2
|
#
|
|
5
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -66,33 +64,39 @@ class TensorTranspose(TensorHasInput, TensorOperatorMixin):
|
|
|
66
64
|
|
|
67
65
|
def transpose(a, axes=None):
|
|
68
66
|
"""
|
|
69
|
-
|
|
67
|
+
Returns an array with axes transposed.
|
|
68
|
+
|
|
69
|
+
For a 1-D array, this returns an unchanged view of the original array, as a
|
|
70
|
+
transposed vector is simply the same vector.
|
|
71
|
+
To convert a 1-D array into a 2-D column vector, an additional dimension
|
|
72
|
+
must be added, e.g., ``mt.atleast_2d(a).T`` achieves this, as does
|
|
73
|
+
``a[:, mt.newaxis]``.
|
|
74
|
+
For a 2-D array, this is the standard matrix transpose.
|
|
75
|
+
For an n-D array, if axes are given, their order indicates how the
|
|
76
|
+
axes are permuted (see Examples). If axes are not provided, then
|
|
77
|
+
``transpose(a).shape == a.shape[::-1]``.
|
|
70
78
|
|
|
71
79
|
Parameters
|
|
72
80
|
----------
|
|
73
81
|
a : array_like
|
|
74
|
-
Input
|
|
75
|
-
axes : list of ints, optional
|
|
76
|
-
|
|
77
|
-
|
|
82
|
+
Input array.
|
|
83
|
+
axes : tuple or list of ints, optional
|
|
84
|
+
If specified, it must be a tuple or list which contains a permutation
|
|
85
|
+
of [0,1,...,N-1] where N is the number of axes of `a`. The `i`'th axis
|
|
86
|
+
of the returned array will correspond to the axis numbered ``axes[i]``
|
|
87
|
+
of the input. If not specified, defaults to ``range(a.ndim)[::-1]``,
|
|
88
|
+
which reverses the order of the axes.
|
|
78
89
|
|
|
79
90
|
Returns
|
|
80
91
|
-------
|
|
81
|
-
p :
|
|
82
|
-
`a` with its axes permuted.
|
|
83
|
-
possible.
|
|
84
|
-
|
|
85
|
-
See Also
|
|
86
|
-
--------
|
|
87
|
-
moveaxis
|
|
88
|
-
argsort
|
|
92
|
+
p : ndarray
|
|
93
|
+
`a` with its axes permuted. A view is returned whenever possible.
|
|
89
94
|
|
|
90
95
|
Notes
|
|
91
96
|
-----
|
|
92
|
-
Use
|
|
97
|
+
Use ``transpose(a, argsort(axes))`` to invert the transposition of tensors
|
|
93
98
|
when using the `axes` keyword argument.
|
|
94
99
|
|
|
95
|
-
Transposing a 1-D array returns an unchanged view of the original tensor.
|
|
96
100
|
|
|
97
101
|
Examples
|
|
98
102
|
--------
|
|
@@ -121,5 +125,5 @@ def transpose(a, axes=None):
|
|
|
121
125
|
axes = list(range(a.ndim))[::-1]
|
|
122
126
|
else:
|
|
123
127
|
axes = list(axes)
|
|
124
|
-
op = TensorTranspose(axes, dtype=a.dtype
|
|
128
|
+
op = TensorTranspose(axes, dtype=a.dtype)
|
|
125
129
|
return op(a)
|
maxframe/tensor/operators.py
CHANGED
|
@@ -12,9 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
from ..core import OutputType
|
|
16
17
|
from ..core.operator import (
|
|
17
|
-
Fuse,
|
|
18
18
|
HasInput,
|
|
19
19
|
MapReduceOperator,
|
|
20
20
|
Operator,
|
|
@@ -115,9 +115,3 @@ class TensorMapReduceOperator(MapReduceOperator):
|
|
|
115
115
|
_output_type_ = OutputType.tensor
|
|
116
116
|
|
|
117
117
|
dtype = DataTypeField("dtype", default=None)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
class TensorFuse(Fuse):
|
|
121
|
-
_output_type_ = OutputType.tensor
|
|
122
|
-
|
|
123
|
-
dtype = DataTypeField("dtype", default=None)
|
maxframe/tensor/random/core.py
CHANGED
|
@@ -19,9 +19,9 @@ from contextlib import contextmanager
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
|
|
21
21
|
from ...serialization.serializables import FieldTypes, Int32Field, TupleField
|
|
22
|
-
from ..base import broadcast_to
|
|
23
22
|
from ..core import TENSOR_TYPE
|
|
24
23
|
from ..datasource import tensor as astensor
|
|
24
|
+
from ..misc import broadcast_to
|
|
25
25
|
from ..operators import TensorMapReduceOperator, TensorOperator, TensorOperatorMixin
|
|
26
26
|
from ..utils import broadcast_shape
|
|
27
27
|
|
|
@@ -24,6 +24,7 @@ from .core import TensorReduction, TensorReductionMixin
|
|
|
24
24
|
|
|
25
25
|
class TensorNanMoment(TensorReduction, TensorReductionMixin):
|
|
26
26
|
_op_type_ = opcodes.NANMOMENT
|
|
27
|
+
_func_name = "nanvar"
|
|
27
28
|
|
|
28
29
|
moment = Int32Field("moment", default=2)
|
|
29
30
|
ddof = Int32Field("ddof", default=None)
|
|
@@ -36,6 +37,7 @@ class TensorNanMoment(TensorReduction, TensorReductionMixin):
|
|
|
36
37
|
|
|
37
38
|
class TensorNanVar(TensorReduction, TensorReductionMixin):
|
|
38
39
|
_op_type_ = opcodes.NANVAR
|
|
40
|
+
_func_name = "nanvar"
|
|
39
41
|
|
|
40
42
|
ddof = Int32Field("ddof", default=0)
|
|
41
43
|
|
|
@@ -17,8 +17,11 @@
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pytest
|
|
19
19
|
|
|
20
|
+
from maxframe.tensor.reduction.core import TensorReduction
|
|
21
|
+
|
|
22
|
+
from ....utils import collect_leaf_operators
|
|
20
23
|
from ...datasource import ones, tensor
|
|
21
|
-
from .. import
|
|
24
|
+
from .. import * # noqa: F401
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
def test_base_reduction():
|
|
@@ -179,3 +182,11 @@ def test_var_reduction():
|
|
|
179
182
|
|
|
180
183
|
res1 = var(ones((10, 8, 8), chunk_size=3), axis=1)
|
|
181
184
|
assert res1.shape == (10, 8)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def test_reduction_op_func_name():
|
|
188
|
+
# make sure all the binary op has defined the func name.
|
|
189
|
+
|
|
190
|
+
results = collect_leaf_operators(TensorReduction)
|
|
191
|
+
for op_type in results:
|
|
192
|
+
assert hasattr(op_type, "_func_name")
|
maxframe/tensor/reduction/var.py
CHANGED
|
@@ -42,6 +42,7 @@ def reduce_var_square(var_square, avg_diff, count, op, axis, sum_func):
|
|
|
42
42
|
|
|
43
43
|
class TensorMoment(TensorReduction, TensorReductionMixin):
|
|
44
44
|
_op_type_ = opcodes.MOMENT
|
|
45
|
+
_func_name = "var"
|
|
45
46
|
|
|
46
47
|
moment = Int32Field("moment", default=2)
|
|
47
48
|
ddof = Int32Field("ddof", default=None)
|
|
@@ -54,6 +55,7 @@ class TensorMoment(TensorReduction, TensorReductionMixin):
|
|
|
54
55
|
|
|
55
56
|
class TensorVar(TensorReduction, TensorReductionMixin):
|
|
56
57
|
_op_type_ = opcodes.VAR
|
|
58
|
+
_func_name = "var"
|
|
57
59
|
|
|
58
60
|
ddof = Int32Field("ddof", default=0)
|
|
59
61
|
|
maxframe/tensor/utils.py
CHANGED
|
@@ -19,18 +19,13 @@ import itertools
|
|
|
19
19
|
import operator
|
|
20
20
|
from collections import OrderedDict
|
|
21
21
|
from collections.abc import Iterable
|
|
22
|
-
from functools import
|
|
22
|
+
from functools import wraps
|
|
23
23
|
from math import ceil
|
|
24
24
|
from numbers import Integral
|
|
25
25
|
from typing import Dict, List, Union
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
28
|
|
|
29
|
-
try:
|
|
30
|
-
import tiledb
|
|
31
|
-
except (ImportError, OSError): # pragma: no cover
|
|
32
|
-
tildb = None
|
|
33
|
-
|
|
34
29
|
from ..core import ExecutableTuple
|
|
35
30
|
from ..lib.mmh3 import hash_from_buffer
|
|
36
31
|
from ..utils import lazy_import
|
|
@@ -508,7 +503,7 @@ def decide_unify_split(*splits):
|
|
|
508
503
|
|
|
509
504
|
|
|
510
505
|
def check_out_param(out, t, casting):
|
|
511
|
-
from .
|
|
506
|
+
from .misc import broadcast_to
|
|
512
507
|
|
|
513
508
|
if not hasattr(out, "shape"):
|
|
514
509
|
raise TypeError("return arrays must be a tensor")
|
|
@@ -563,21 +558,6 @@ def filter_inputs(inputs):
|
|
|
563
558
|
return [inp for inp in inputs if isinstance(inp, ENTITY_TYPE)]
|
|
564
559
|
|
|
565
560
|
|
|
566
|
-
# As TileDB Ctx's creation is a bit time-consuming,
|
|
567
|
-
# we just cache the Ctx
|
|
568
|
-
# also remember the arguments should be hashable
|
|
569
|
-
@lru_cache(10)
|
|
570
|
-
def _create_tiledb_ctx(conf_tuple):
|
|
571
|
-
if conf_tuple is not None:
|
|
572
|
-
return tiledb.Ctx(dict(conf_tuple))
|
|
573
|
-
return tiledb.Ctx()
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
def get_tiledb_ctx(conf):
|
|
577
|
-
key = tuple(conf.items()) if conf is not None else None
|
|
578
|
-
return _create_tiledb_ctx(key)
|
|
579
|
-
|
|
580
|
-
|
|
581
561
|
# this function is only used for pandas' compatibility
|
|
582
562
|
def to_numpy(pdf):
|
|
583
563
|
try:
|
maxframe/typing_.py
CHANGED
|
@@ -12,11 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from numbers import Integral
|
|
16
|
+
from typing import List, TypeVar, Union
|
|
16
17
|
|
|
17
18
|
import pandas as pd
|
|
18
19
|
import pyarrow as pa
|
|
19
20
|
|
|
21
|
+
SlicesType = List[Union[None, Integral, slice]]
|
|
22
|
+
|
|
20
23
|
TimeoutType = Union[int, float, None]
|
|
21
24
|
|
|
22
25
|
|
maxframe/udf.py
CHANGED
|
@@ -29,28 +29,25 @@ from .utils import tokenize
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class PythonPackOptions(Serializable):
|
|
32
|
+
_key_args = ("force_rebuild", "prefer_binary", "pre_release", "no_audit_wheel")
|
|
33
|
+
|
|
32
34
|
key = StringField("key")
|
|
33
35
|
requirements = ListField("requirements", FieldTypes.string, default_factory=list)
|
|
34
36
|
force_rebuild = BoolField("force_rebuild", default=False)
|
|
35
37
|
prefer_binary = BoolField("prefer_binary", default=False)
|
|
36
38
|
pre_release = BoolField("pre_release", default=False)
|
|
37
39
|
pack_instance_id = StringField("pack_instance_id", default=None)
|
|
40
|
+
no_audit_wheel = BoolField("no_audit_wheel", default=False)
|
|
38
41
|
|
|
39
42
|
def __init__(self, key: str = None, **kw):
|
|
40
43
|
super().__init__(key=key, **kw)
|
|
41
44
|
if self.key is None:
|
|
42
|
-
args = {
|
|
43
|
-
"force_rebuild": self.force_rebuild,
|
|
44
|
-
"prefer_binary": self.prefer_binary,
|
|
45
|
-
"pre_release": self.pre_release,
|
|
46
|
-
}
|
|
45
|
+
args = {k: getattr(self, k) for k in self._key_args}
|
|
47
46
|
self.key = tokenize(set(self.requirements), args)
|
|
48
47
|
|
|
49
48
|
def __repr__(self):
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
f"prefer_binary={self.prefer_binary} pre_release={self.pre_release}>"
|
|
53
|
-
)
|
|
49
|
+
args_str = " ".join(f"{k}={getattr(self, k)}" for k in self._key_args)
|
|
50
|
+
return f"<PythonPackOptions {self.requirements} {args_str}>"
|
|
54
51
|
|
|
55
52
|
|
|
56
53
|
class MarkedFunction(Serializable):
|
|
@@ -101,6 +98,7 @@ def with_python_requirements(
|
|
|
101
98
|
force_rebuild: bool = False,
|
|
102
99
|
prefer_binary: bool = False,
|
|
103
100
|
pre_release: bool = False,
|
|
101
|
+
no_audit_wheel: bool = False,
|
|
104
102
|
):
|
|
105
103
|
result_req = []
|
|
106
104
|
for req in requirements:
|
|
@@ -112,6 +110,7 @@ def with_python_requirements(
|
|
|
112
110
|
force_rebuild=force_rebuild,
|
|
113
111
|
prefer_binary=prefer_binary,
|
|
114
112
|
pre_release=pre_release,
|
|
113
|
+
no_audit_wheel=no_audit_wheel,
|
|
115
114
|
)
|
|
116
115
|
if isinstance(func, MarkedFunction):
|
|
117
116
|
func.pythonpacks.append(pack_item)
|
maxframe/utils.py
CHANGED
|
@@ -19,7 +19,6 @@ import dataclasses
|
|
|
19
19
|
import datetime
|
|
20
20
|
import enum
|
|
21
21
|
import functools
|
|
22
|
-
import hashlib
|
|
23
22
|
import importlib
|
|
24
23
|
import inspect
|
|
25
24
|
import io
|
|
@@ -75,7 +74,7 @@ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
|
|
|
75
74
|
tokenize_int,
|
|
76
75
|
)
|
|
77
76
|
from .lib.version import parse as parse_version
|
|
78
|
-
from .typing_ import
|
|
77
|
+
from .typing_ import TileableType, TimeoutType
|
|
79
78
|
|
|
80
79
|
# make flake8 happy by referencing these imports
|
|
81
80
|
NamedType = NamedType
|
|
@@ -245,58 +244,6 @@ def copy_tileables(tileables: List[TileableType], **kwargs):
|
|
|
245
244
|
return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
|
|
246
245
|
|
|
247
246
|
|
|
248
|
-
def build_fetch_chunk(chunk: ChunkType, **kwargs) -> ChunkType:
|
|
249
|
-
from .core.operator import ShuffleProxy
|
|
250
|
-
|
|
251
|
-
chunk_op = chunk.op
|
|
252
|
-
params = chunk.params.copy()
|
|
253
|
-
assert not isinstance(chunk_op, ShuffleProxy)
|
|
254
|
-
# for non-shuffle nodes, we build Fetch chunks
|
|
255
|
-
# to replace original chunk
|
|
256
|
-
op = chunk_op.get_fetch_op_cls(chunk)(sparse=chunk.op.sparse, gpu=chunk.op.gpu)
|
|
257
|
-
return op.new_chunk(
|
|
258
|
-
None,
|
|
259
|
-
is_broadcaster=chunk.is_broadcaster,
|
|
260
|
-
kws=[params],
|
|
261
|
-
_key=chunk.key,
|
|
262
|
-
**kwargs,
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def build_fetch_tileable(tileable: TileableType) -> TileableType:
|
|
267
|
-
if tileable.is_coarse():
|
|
268
|
-
chunks = None
|
|
269
|
-
else:
|
|
270
|
-
chunks = []
|
|
271
|
-
for c in tileable.chunks:
|
|
272
|
-
fetch_chunk = build_fetch_chunk(c, index=c.index)
|
|
273
|
-
chunks.append(fetch_chunk)
|
|
274
|
-
|
|
275
|
-
tileable_op = tileable.op
|
|
276
|
-
params = tileable.params.copy()
|
|
277
|
-
|
|
278
|
-
new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
|
|
279
|
-
return new_op.new_tileables(
|
|
280
|
-
None,
|
|
281
|
-
chunks=chunks,
|
|
282
|
-
nsplits=tileable.nsplits,
|
|
283
|
-
_key=tileable.key,
|
|
284
|
-
_id=tileable.id,
|
|
285
|
-
**params,
|
|
286
|
-
)[0]
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
def build_fetch(entity: EntityType) -> EntityType:
|
|
290
|
-
from .core import CHUNK_TYPE, ENTITY_TYPE
|
|
291
|
-
|
|
292
|
-
if isinstance(entity, CHUNK_TYPE):
|
|
293
|
-
return build_fetch_chunk(entity)
|
|
294
|
-
elif isinstance(entity, ENTITY_TYPE):
|
|
295
|
-
return build_fetch_tileable(entity)
|
|
296
|
-
else:
|
|
297
|
-
raise TypeError(f"Type {type(entity)} not supported")
|
|
298
|
-
|
|
299
|
-
|
|
300
247
|
def get_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
|
|
301
248
|
if pd.api.types.is_extension_array_dtype(dtype):
|
|
302
249
|
return dtype
|
|
@@ -386,13 +333,7 @@ def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> st
|
|
|
386
333
|
|
|
387
334
|
|
|
388
335
|
def build_session_volume_name(session_id: str) -> str:
|
|
389
|
-
return f"mf_vol_{session_id}"
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
def build_tileable_dir_name(tileable_key: str) -> str:
|
|
393
|
-
m = hashlib.md5()
|
|
394
|
-
m.update(f"mf_dir_{tileable_key}".encode())
|
|
395
|
-
return m.hexdigest()
|
|
336
|
+
return f"mf_vol_{session_id.replace('-', '_')}"
|
|
396
337
|
|
|
397
338
|
|
|
398
339
|
async def wait_http_response(
|
|
@@ -429,13 +370,6 @@ def format_timeout_params(timeout: TimeoutType) -> str:
|
|
|
429
370
|
return f"?wait=1&timeout={timeout}"
|
|
430
371
|
|
|
431
372
|
|
|
432
|
-
async def to_thread_pool(func, *args, pool=None, **kwargs):
|
|
433
|
-
loop = asyncio.events.get_running_loop()
|
|
434
|
-
ctx = contextvars.copy_context()
|
|
435
|
-
func_call = functools.partial(ctx.run, func, *args, **kwargs)
|
|
436
|
-
return await loop.run_in_executor(pool, func_call)
|
|
437
|
-
|
|
438
|
-
|
|
439
373
|
_PrimitiveType = TypeVar("_PrimitiveType")
|
|
440
374
|
|
|
441
375
|
|
|
@@ -497,15 +431,22 @@ class ToThreadMixin:
|
|
|
497
431
|
thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
|
|
498
432
|
)
|
|
499
433
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
)
|
|
434
|
+
loop = asyncio.events.get_running_loop()
|
|
435
|
+
ctx = contextvars.copy_context()
|
|
436
|
+
func_call = functools.partial(ctx.run, func, *args, **kwargs)
|
|
437
|
+
fut = loop.run_in_executor(self._pool, func_call)
|
|
438
|
+
|
|
503
439
|
try:
|
|
504
|
-
|
|
440
|
+
coro = fut
|
|
441
|
+
if wait_on_cancel:
|
|
442
|
+
coro = asyncio.shield(coro)
|
|
443
|
+
if timeout is not None:
|
|
444
|
+
coro = asyncio.wait_for(coro, timeout)
|
|
445
|
+
return await coro
|
|
505
446
|
except (asyncio.CancelledError, asyncio.TimeoutError) as ex:
|
|
506
447
|
if not wait_on_cancel:
|
|
507
448
|
raise
|
|
508
|
-
result = await
|
|
449
|
+
result = await fut
|
|
509
450
|
raise ToThreadCancelledError(*ex.args, result=result)
|
|
510
451
|
|
|
511
452
|
def ensure_async_call(
|
|
@@ -1123,3 +1064,38 @@ def get_item_if_scalar(val: Any) -> Any:
|
|
|
1123
1064
|
if isinstance(val, np.ndarray) and val.shape == ():
|
|
1124
1065
|
return val.item()
|
|
1125
1066
|
return val
|
|
1067
|
+
|
|
1068
|
+
|
|
1069
|
+
def collect_leaf_operators(root) -> List[Type]:
|
|
1070
|
+
result = []
|
|
1071
|
+
|
|
1072
|
+
def _collect(op_type):
|
|
1073
|
+
if len(op_type.__subclasses__()) == 0:
|
|
1074
|
+
result.append(op_type)
|
|
1075
|
+
for subclass in op_type.__subclasses__():
|
|
1076
|
+
_collect(subclass)
|
|
1077
|
+
|
|
1078
|
+
_collect(root)
|
|
1079
|
+
return result
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
@contextmanager
|
|
1083
|
+
def sync_pyodps_options():
|
|
1084
|
+
from odps.config import OptionError
|
|
1085
|
+
from odps.config import option_context as pyodps_option_context
|
|
1086
|
+
|
|
1087
|
+
from .config import options
|
|
1088
|
+
|
|
1089
|
+
with pyodps_option_context() as cfg:
|
|
1090
|
+
cfg.local_timezone = options.local_timezone
|
|
1091
|
+
if options.session.enable_schema:
|
|
1092
|
+
try:
|
|
1093
|
+
cfg.enable_schema = options.session.enable_schema
|
|
1094
|
+
except (AttributeError, OptionError):
|
|
1095
|
+
# fixme enable_schema only supported in PyODPS 0.12.0 or later
|
|
1096
|
+
cfg.always_enable_schema = options.session.enable_schema
|
|
1097
|
+
yield
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
def str_to_bool(s: Optional[str]) -> Optional[bool]:
|
|
1101
|
+
return s.lower().strip() in ("true", "1") if s is not None else None
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: maxframe
|
|
3
|
+
Version: 1.0.0rc4
|
|
4
|
+
Summary: MaxFrame operator-based data analyze framework
|
|
5
|
+
Requires-Dist: numpy<2.0.0,>=1.19.0
|
|
6
|
+
Requires-Dist: pandas>=1.0.0
|
|
7
|
+
Requires-Dist: pyodps>=0.11.6.1
|
|
8
|
+
Requires-Dist: scipy>=1.0
|
|
9
|
+
Requires-Dist: pyarrow>=1.0.0
|
|
10
|
+
Requires-Dist: msgpack>=1.0.0
|
|
11
|
+
Requires-Dist: traitlets>=5.0
|
|
12
|
+
Requires-Dist: cloudpickle<3.0.0,>=1.5.0
|
|
13
|
+
Requires-Dist: pyyaml>=5.1
|
|
14
|
+
Requires-Dist: tornado>=6.0
|
|
15
|
+
Requires-Dist: defusedxml>=0.5.0
|
|
16
|
+
Requires-Dist: tqdm>=4.1.0
|
|
17
|
+
Requires-Dist: importlib-metadata>=1.4
|
|
18
|
+
Requires-Dist: pickle5; python_version < "3.8"
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: black>=22.3.0; extra == "dev"
|
|
21
|
+
Requires-Dist: flake8>=5.0.4; extra == "dev"
|
|
22
|
+
Requires-Dist: pre-commit>=2.15.0; extra == "dev"
|
|
23
|
+
Requires-Dist: graphviz>=0.20.1; extra == "dev"
|
|
24
|
+
Provides-Extra: test
|
|
25
|
+
Requires-Dist: mock; extra == "test"
|
|
26
|
+
Requires-Dist: pytest>=7.3.1; extra == "test"
|
|
27
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "test"
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
|
|
29
|
+
Requires-Dist: pytest-timeout>=2.1.0; extra == "test"
|
|
30
|
+
Requires-Dist: matplotlib>=2.0.0; extra == "test"
|
|
31
|
+
|
|
32
|
+
MaxCompute MaxFrame Client
|
|
33
|
+
==========================
|
|
34
|
+
|
|
35
|
+
MaxFrame is a computational framework created by Alibaba Cloud to
|
|
36
|
+
provide a way for Python developers to parallelize their code with
|
|
37
|
+
MaxCompute. It creates a runnable computation graph locally, submits it
|
|
38
|
+
to MaxCompute to execute and obtains results from MaxCompute.
|
|
39
|
+
|
|
40
|
+
MaxFrame client is the client of MaxFrame. Currently it provides a
|
|
41
|
+
DataFrame-based SDK with compatible APIs for pandas. In future, other
|
|
42
|
+
common Python libraries like numpy and scikit-learn will be added as
|
|
43
|
+
well. Python 3.7 is recommended for MaxFrame client to enable all
|
|
44
|
+
functionalities while supports for higher Python versions are on the
|
|
45
|
+
way.
|
|
46
|
+
|
|
47
|
+
Installation
|
|
48
|
+
------------
|
|
49
|
+
|
|
50
|
+
You may install MaxFrame client through PIP:
|
|
51
|
+
|
|
52
|
+
.. code:: bash
|
|
53
|
+
|
|
54
|
+
pip install maxframe
|
|
55
|
+
|
|
56
|
+
Latest beta version can be installed with ``--pre`` argument:
|
|
57
|
+
|
|
58
|
+
.. code:: bash
|
|
59
|
+
|
|
60
|
+
pip install --pre maxframe
|
|
61
|
+
|
|
62
|
+
You can also install MaxFrame client from source code:
|
|
63
|
+
|
|
64
|
+
.. code:: bash
|
|
65
|
+
|
|
66
|
+
pip install git+https://github.com/aliyun/alibabacloud-odps-maxframe-client.git
|
|
67
|
+
|
|
68
|
+
Getting started
|
|
69
|
+
---------------
|
|
70
|
+
|
|
71
|
+
We show a simple code example of MaxFrame client which read data from a
|
|
72
|
+
MaxCompute table, performs some simple data transform and writes back
|
|
73
|
+
into MaxCompute.
|
|
74
|
+
|
|
75
|
+
.. code:: python
|
|
76
|
+
|
|
77
|
+
import maxframe.dataframe as md
|
|
78
|
+
import os
|
|
79
|
+
from maxframe import new_session
|
|
80
|
+
from odps import ODPS
|
|
81
|
+
|
|
82
|
+
o = ODPS(
|
|
83
|
+
os.getenv('ALIBABA_CLOUD_ACCESS_KEY_ID'),
|
|
84
|
+
os.getenv('ALIBABA_CLOUD_ACCESS_KEY_SECRET'),
|
|
85
|
+
project='your-default-project',
|
|
86
|
+
endpoint='your-end-point',
|
|
87
|
+
)
|
|
88
|
+
session = new_session(o)
|
|
89
|
+
|
|
90
|
+
df = md.read_odps_table("source_table")
|
|
91
|
+
df["A"] = "prefix_" + df["A"]
|
|
92
|
+
md.to_odps_table(df, "prefix_source_table")
|
|
93
|
+
|
|
94
|
+
Documentation
|
|
95
|
+
-------------
|
|
96
|
+
|
|
97
|
+
Detailed documentations can be found
|
|
98
|
+
`here <https://maxframe.readthedocs.io>`__.
|
|
99
|
+
|
|
100
|
+
License
|
|
101
|
+
-------
|
|
102
|
+
|
|
103
|
+
Licensed under the `Apache License
|
|
104
|
+
2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__.
|