maxframe 0.1.0b4__cp39-cp39-win32.whl → 1.0.0rc1__cp39-cp39-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp39-win32.pyd +0 -0
- maxframe/codegen.py +56 -3
- maxframe/config/config.py +15 -1
- maxframe/core/__init__.py +0 -3
- maxframe/core/entity/__init__.py +1 -8
- maxframe/core/entity/objects.py +3 -45
- maxframe/core/graph/core.cp39-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/core.py +30 -8
- maxframe/dataframe/datasource/read_odps_query.py +3 -1
- maxframe/dataframe/datasource/read_odps_table.py +3 -1
- maxframe/dataframe/datastore/tests/__init__.py +13 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +21 -0
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +3 -1
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +84 -0
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +2 -1
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/utils.py +7 -0
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp39-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/odpsio/arrow.py +2 -3
- maxframe/odpsio/tableio.py +22 -0
- maxframe/odpsio/tests/test_schema.py +16 -11
- maxframe/opcodes.py +3 -0
- maxframe/protocol.py +108 -10
- maxframe/serialization/core.cp39-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +54 -25
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +7 -2
- maxframe/serialization/serializables/core.py +119 -12
- maxframe/serialization/serializables/tests/test_serializable.py +46 -4
- maxframe/session.py +28 -0
- maxframe/tensor/__init__.py +1 -1
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
- maxframe/tensor/base/__init__.py +2 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/datasource/array.py +4 -2
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +1 -1
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +2 -2
- maxframe/udf.py +63 -3
- maxframe/utils.py +22 -13
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/METADATA +3 -3
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/RECORD +80 -61
- maxframe_client/__init__.py +0 -1
- maxframe_client/fetcher.py +65 -3
- maxframe_client/session/odps.py +74 -5
- maxframe_client/session/task.py +65 -71
- maxframe_client/tests/test_session.py +64 -1
- maxframe_client/clients/spe.py +0 -104
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from ... import opcodes as OperandDef
|
|
19
|
+
from ...serialization.serializables import BoolField, Int32Field
|
|
20
|
+
from ..core import TensorOrder
|
|
21
|
+
from ..operators import TensorHasInput, TensorOperatorMixin
|
|
22
|
+
from ..utils import validate_axis
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TensorUnique(TensorHasInput, TensorOperatorMixin):
|
|
26
|
+
_op_type_ = OperandDef.UNIQUE
|
|
27
|
+
|
|
28
|
+
return_index = BoolField("return_index", default=False)
|
|
29
|
+
return_inverse = BoolField("return_inverse", default=False)
|
|
30
|
+
return_counts = BoolField("return_counts", default=False)
|
|
31
|
+
axis = Int32Field("axis", default=None)
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def output_limit(self):
|
|
35
|
+
return 1
|
|
36
|
+
|
|
37
|
+
def _gen_kws(self, input_obj, chunk=False, chunk_index=None):
|
|
38
|
+
kws = []
|
|
39
|
+
|
|
40
|
+
# unique tensor
|
|
41
|
+
shape = list(input_obj.shape)
|
|
42
|
+
shape[self.axis] = np.nan
|
|
43
|
+
kw = {"shape": tuple(shape), "dtype": input_obj.dtype, "gpu": input_obj.op.gpu}
|
|
44
|
+
if chunk:
|
|
45
|
+
idx = [0] * len(shape)
|
|
46
|
+
idx[self.axis] = chunk_index or 0
|
|
47
|
+
kw["index"] = tuple(idx)
|
|
48
|
+
kws.append(kw)
|
|
49
|
+
|
|
50
|
+
# unique indices tensor
|
|
51
|
+
if self.return_index:
|
|
52
|
+
kw = {
|
|
53
|
+
"shape": (np.nan,),
|
|
54
|
+
"dtype": np.dtype(np.intp),
|
|
55
|
+
"gpu": input_obj.op.gpu,
|
|
56
|
+
"type": "indices",
|
|
57
|
+
}
|
|
58
|
+
if chunk:
|
|
59
|
+
kw["index"] = (chunk_index or 0,)
|
|
60
|
+
kws.append(kw)
|
|
61
|
+
|
|
62
|
+
# unique inverse tensor
|
|
63
|
+
if self.return_inverse:
|
|
64
|
+
kw = {
|
|
65
|
+
"shape": (input_obj.shape[self.axis],),
|
|
66
|
+
"dtype": np.dtype(np.intp),
|
|
67
|
+
"gpu": input_obj.op.gpu,
|
|
68
|
+
"type": "inverse",
|
|
69
|
+
}
|
|
70
|
+
if chunk:
|
|
71
|
+
kw["index"] = (chunk_index or 0,)
|
|
72
|
+
kws.append(kw)
|
|
73
|
+
|
|
74
|
+
# unique counts tensor
|
|
75
|
+
if self.return_counts:
|
|
76
|
+
kw = {
|
|
77
|
+
"shape": (np.nan,),
|
|
78
|
+
"dtype": np.dtype(int),
|
|
79
|
+
"gpu": input_obj.op.gpu,
|
|
80
|
+
"type": "counts",
|
|
81
|
+
}
|
|
82
|
+
if chunk:
|
|
83
|
+
kw["index"] = (chunk_index or 0,)
|
|
84
|
+
kws.append(kw)
|
|
85
|
+
|
|
86
|
+
return kws
|
|
87
|
+
|
|
88
|
+
def __call__(self, ar):
|
|
89
|
+
from .atleast_1d import atleast_1d
|
|
90
|
+
|
|
91
|
+
ar = atleast_1d(ar)
|
|
92
|
+
if self.axis is None:
|
|
93
|
+
if ar.ndim > 1:
|
|
94
|
+
ar = ar.flatten()
|
|
95
|
+
self._axis = 0
|
|
96
|
+
else:
|
|
97
|
+
self._axis = validate_axis(ar.ndim, self._axis)
|
|
98
|
+
|
|
99
|
+
kws = self._gen_kws(self, ar)
|
|
100
|
+
tensors = self.new_tensors([ar], kws=kws, order=TensorOrder.C_ORDER)
|
|
101
|
+
if len(tensors) == 1:
|
|
102
|
+
return tensors[0]
|
|
103
|
+
return tensors
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def unique(
|
|
107
|
+
ar,
|
|
108
|
+
return_index=False,
|
|
109
|
+
return_inverse=False,
|
|
110
|
+
return_counts=False,
|
|
111
|
+
axis=None,
|
|
112
|
+
):
|
|
113
|
+
"""
|
|
114
|
+
Find the unique elements of a tensor.
|
|
115
|
+
|
|
116
|
+
Returns the sorted unique elements of a tensor. There are three optional
|
|
117
|
+
outputs in addition to the unique elements:
|
|
118
|
+
|
|
119
|
+
* the indices of the input tensor that give the unique values
|
|
120
|
+
* the indices of the unique tensor that reconstruct the input tensor
|
|
121
|
+
* the number of times each unique value comes up in the input tensor
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
ar : array_like
|
|
126
|
+
Input tensor. Unless `axis` is specified, this will be flattened if it
|
|
127
|
+
is not already 1-D.
|
|
128
|
+
return_index : bool, optional
|
|
129
|
+
If True, also return the indices of `ar` (along the specified axis,
|
|
130
|
+
if provided, or in the flattened tensor) that result in the unique tensor.
|
|
131
|
+
return_inverse : bool, optional
|
|
132
|
+
If True, also return the indices of the unique tensor (for the specified
|
|
133
|
+
axis, if provided) that can be used to reconstruct `ar`.
|
|
134
|
+
return_counts : bool, optional
|
|
135
|
+
If True, also return the number of times each unique item appears
|
|
136
|
+
in `ar`.
|
|
137
|
+
axis : int or None, optional
|
|
138
|
+
The axis to operate on. If None, `ar` will be flattened. If an integer,
|
|
139
|
+
the subarrays indexed by the given axis will be flattened and treated
|
|
140
|
+
as the elements of a 1-D tensor with the dimension of the given axis,
|
|
141
|
+
see the notes for more details. Object tensors or structured tensors
|
|
142
|
+
that contain objects are not supported if the `axis` kwarg is used. The
|
|
143
|
+
default is None.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
unique : Tensor
|
|
148
|
+
The sorted unique values.
|
|
149
|
+
unique_indices : Tensor, optional
|
|
150
|
+
The indices of the first occurrences of the unique values in the
|
|
151
|
+
original tensor. Only provided if `return_index` is True.
|
|
152
|
+
unique_inverse : Tensor, optional
|
|
153
|
+
The indices to reconstruct the original tensor from the
|
|
154
|
+
unique tensor. Only provided if `return_inverse` is True.
|
|
155
|
+
unique_counts : Tensor, optional
|
|
156
|
+
The number of times each of the unique values comes up in the
|
|
157
|
+
original tensor. Only provided if `return_counts` is True.
|
|
158
|
+
|
|
159
|
+
Examples
|
|
160
|
+
--------
|
|
161
|
+
>>> import maxframe.tensor as mt
|
|
162
|
+
|
|
163
|
+
>>> mt.unique([1, 1, 2, 2, 3, 3]).execute()
|
|
164
|
+
array([1, 2, 3])
|
|
165
|
+
>>> a = mt.array([[1, 1], [2, 3]])
|
|
166
|
+
>>> mt.unique(a).execute()
|
|
167
|
+
array([1, 2, 3])
|
|
168
|
+
|
|
169
|
+
Return the unique rows of a 2D tensor
|
|
170
|
+
|
|
171
|
+
>>> a = mt.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
|
|
172
|
+
>>> mt.unique(a, axis=0).execute()
|
|
173
|
+
array([[1, 0, 0], [2, 3, 4]])
|
|
174
|
+
|
|
175
|
+
Return the indices of the original tensor that give the unique values:
|
|
176
|
+
|
|
177
|
+
>>> a = mt.array(['a', 'b', 'b', 'c', 'a'])
|
|
178
|
+
>>> u, indices = mt.unique(a, return_index=True)
|
|
179
|
+
>>> u.execute()
|
|
180
|
+
array(['a', 'b', 'c'],
|
|
181
|
+
dtype='|S1')
|
|
182
|
+
>>> indices.execute()
|
|
183
|
+
array([0, 1, 3])
|
|
184
|
+
>>> a[indices].execute()
|
|
185
|
+
array(['a', 'b', 'c'],
|
|
186
|
+
dtype='|S1')
|
|
187
|
+
|
|
188
|
+
Reconstruct the input array from the unique values:
|
|
189
|
+
|
|
190
|
+
>>> a = mt.array([1, 2, 6, 4, 2, 3, 2])
|
|
191
|
+
>>> u, indices = mt.unique(a, return_inverse=True)
|
|
192
|
+
>>> u.execute()
|
|
193
|
+
array([1, 2, 3, 4, 6])
|
|
194
|
+
>>> indices.execute()
|
|
195
|
+
array([0, 1, 4, 3, 1, 2, 1])
|
|
196
|
+
>>> u[indices].execute()
|
|
197
|
+
array([1, 2, 6, 4, 2, 3, 2])
|
|
198
|
+
"""
|
|
199
|
+
op = TensorUnique(
|
|
200
|
+
return_index=return_index,
|
|
201
|
+
return_inverse=return_inverse,
|
|
202
|
+
return_counts=return_counts,
|
|
203
|
+
axis=axis,
|
|
204
|
+
)
|
|
205
|
+
return op(ar)
|
|
@@ -20,6 +20,7 @@ from ...serialization.serializables import (
|
|
|
20
20
|
AnyField,
|
|
21
21
|
FieldTypes,
|
|
22
22
|
NDArrayField,
|
|
23
|
+
StringField,
|
|
23
24
|
TupleField,
|
|
24
25
|
)
|
|
25
26
|
from ...utils import on_deserialize_shape, on_serialize_shape
|
|
@@ -37,8 +38,9 @@ class ArrayDataSource(TensorNoInput):
|
|
|
37
38
|
|
|
38
39
|
_op_type_ = opcodes.TENSOR_DATA_SOURCE
|
|
39
40
|
|
|
40
|
-
data = NDArrayField("data")
|
|
41
|
-
chunk_size = AnyField("chunk_size")
|
|
41
|
+
data = NDArrayField("data", default=None)
|
|
42
|
+
chunk_size = AnyField("chunk_size", default=None)
|
|
43
|
+
order = StringField("order", default=None)
|
|
42
44
|
|
|
43
45
|
def __init__(self, data=None, dtype=None, gpu=None, **kw):
|
|
44
46
|
if dtype is not None:
|
|
@@ -33,7 +33,7 @@ class Scalar(TensorNoInput):
|
|
|
33
33
|
def scalar(data, dtype=None, gpu=None):
|
|
34
34
|
try:
|
|
35
35
|
arr = np.array(data, dtype=dtype)
|
|
36
|
-
op = Scalar(arr, dtype=arr.dtype, gpu=gpu)
|
|
36
|
+
op = Scalar(data=arr, dtype=arr.dtype, gpu=gpu)
|
|
37
37
|
shape = ()
|
|
38
38
|
return op(shape)
|
|
39
39
|
except ValueError:
|
maxframe/tests/test_protocol.py
CHANGED
|
@@ -85,6 +85,40 @@ def test_error_info_json_serialize():
|
|
|
85
85
|
deserial_err_info.reraise()
|
|
86
86
|
|
|
87
87
|
|
|
88
|
+
class CannotPickleException(Exception):
|
|
89
|
+
def __reduce__(self):
|
|
90
|
+
raise ValueError
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class CannotUnpickleException(Exception):
|
|
94
|
+
@classmethod
|
|
95
|
+
def load_from_pk(cls, _):
|
|
96
|
+
raise ValueError
|
|
97
|
+
|
|
98
|
+
def __reduce__(self):
|
|
99
|
+
return type(self).load_from_pk, (0,)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_error_info_fallback_json_serialize():
|
|
103
|
+
try:
|
|
104
|
+
raise CannotPickleException
|
|
105
|
+
except CannotPickleException as ex:
|
|
106
|
+
err_info1 = ErrorInfo.from_exception(ex)
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
raise CannotUnpickleException
|
|
110
|
+
except CannotUnpickleException as ex:
|
|
111
|
+
err_info2 = ErrorInfo.from_exception(ex)
|
|
112
|
+
|
|
113
|
+
for err_info in (err_info1, err_info2):
|
|
114
|
+
deserial_err_info = ErrorInfo.from_json(err_info.to_json())
|
|
115
|
+
assert deserial_err_info.raw_error_source is None
|
|
116
|
+
assert deserial_err_info.raw_error_data is None
|
|
117
|
+
|
|
118
|
+
with pytest.raises(RemoteException):
|
|
119
|
+
deserial_err_info.reraise()
|
|
120
|
+
|
|
121
|
+
|
|
88
122
|
def test_dag_info_json_serialize():
|
|
89
123
|
try:
|
|
90
124
|
raise ValueError("ERR_DATA")
|
maxframe/tests/test_utils.py
CHANGED
|
@@ -288,15 +288,6 @@ def test_estimate_pandas_size():
|
|
|
288
288
|
df2 = pd.DataFrame(np.random.rand(1000, 10))
|
|
289
289
|
assert utils.estimate_pandas_size(df2) == sys.getsizeof(df2)
|
|
290
290
|
|
|
291
|
-
df3 = pd.DataFrame(
|
|
292
|
-
{
|
|
293
|
-
"A": np.random.choice(["abcd", "def", "gh"], size=(1000,)),
|
|
294
|
-
"B": np.random.rand(1000),
|
|
295
|
-
"C": np.random.rand(1000),
|
|
296
|
-
}
|
|
297
|
-
)
|
|
298
|
-
assert utils.estimate_pandas_size(df3) != sys.getsizeof(df3)
|
|
299
|
-
|
|
300
291
|
s1 = pd.Series(np.random.rand(1000))
|
|
301
292
|
assert utils.estimate_pandas_size(s1) == sys.getsizeof(s1)
|
|
302
293
|
|
|
@@ -307,7 +298,6 @@ def test_estimate_pandas_size():
|
|
|
307
298
|
assert utils.estimate_pandas_size(s2) == sys.getsizeof(s2)
|
|
308
299
|
|
|
309
300
|
s3 = pd.Series(np.random.choice(["abcd", "def", "gh"], size=(1000,)))
|
|
310
|
-
assert utils.estimate_pandas_size(s3) != sys.getsizeof(s3)
|
|
311
301
|
assert (
|
|
312
302
|
pytest.approx(utils.estimate_pandas_size(s3) / sys.getsizeof(s3), abs=0.5) == 1
|
|
313
303
|
)
|
|
@@ -318,7 +308,6 @@ def test_estimate_pandas_size():
|
|
|
318
308
|
assert utils.estimate_pandas_size(idx1) == sys.getsizeof(idx1)
|
|
319
309
|
|
|
320
310
|
string_idx = pd.Index(np.random.choice(["a", "bb", "cc"], size=(1000,)))
|
|
321
|
-
assert utils.estimate_pandas_size(string_idx) != sys.getsizeof(string_idx)
|
|
322
311
|
assert (
|
|
323
312
|
pytest.approx(
|
|
324
313
|
utils.estimate_pandas_size(string_idx) / sys.getsizeof(string_idx), abs=0.5
|
|
@@ -338,7 +327,6 @@ def test_estimate_pandas_size():
|
|
|
338
327
|
},
|
|
339
328
|
index=idx2,
|
|
340
329
|
)
|
|
341
|
-
assert utils.estimate_pandas_size(df4) != sys.getsizeof(df4)
|
|
342
330
|
assert (
|
|
343
331
|
pytest.approx(utils.estimate_pandas_size(df4) / sys.getsizeof(df4), abs=0.5)
|
|
344
332
|
== 1
|
maxframe/tests/utils.py
CHANGED
|
@@ -25,7 +25,7 @@ import pytest
|
|
|
25
25
|
from tornado import netutil
|
|
26
26
|
|
|
27
27
|
from ..core import Tileable, TileableGraph
|
|
28
|
-
from ..utils import lazy_import
|
|
28
|
+
from ..utils import create_event, lazy_import
|
|
29
29
|
|
|
30
30
|
try:
|
|
31
31
|
from flaky import flaky
|
|
@@ -102,7 +102,7 @@ def run_app_in_thread(app_func):
|
|
|
102
102
|
def fixture_func(*args, **kwargs):
|
|
103
103
|
app_loop = asyncio.new_event_loop()
|
|
104
104
|
q = queue.Queue()
|
|
105
|
-
exit_event =
|
|
105
|
+
exit_event = create_event(app_loop)
|
|
106
106
|
app_thread = Thread(
|
|
107
107
|
name="TestAppThread",
|
|
108
108
|
target=app_thread_func,
|
maxframe/udf.py
CHANGED
|
@@ -12,21 +12,51 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import shlex
|
|
15
16
|
from typing import Callable, List, Optional, Union
|
|
16
17
|
|
|
17
18
|
from odps.models import Resource
|
|
18
19
|
|
|
19
20
|
from .serialization.serializables import (
|
|
21
|
+
BoolField,
|
|
20
22
|
FieldTypes,
|
|
21
23
|
FunctionField,
|
|
22
24
|
ListField,
|
|
23
25
|
Serializable,
|
|
26
|
+
StringField,
|
|
24
27
|
)
|
|
28
|
+
from .utils import tokenize
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PythonPackOptions(Serializable):
|
|
32
|
+
key = StringField("key")
|
|
33
|
+
requirements = ListField("requirements", FieldTypes.string, default_factory=list)
|
|
34
|
+
force_rebuild = BoolField("force_rebuild", default=False)
|
|
35
|
+
prefer_binary = BoolField("prefer_binary", default=False)
|
|
36
|
+
pre_release = BoolField("pre_release", default=False)
|
|
37
|
+
pack_instance_id = StringField("pack_instance_id", default=None)
|
|
38
|
+
|
|
39
|
+
def __init__(self, key: str = None, **kw):
|
|
40
|
+
super().__init__(key=key, **kw)
|
|
41
|
+
if self.key is None:
|
|
42
|
+
args = {
|
|
43
|
+
"force_rebuild": self.force_rebuild,
|
|
44
|
+
"prefer_binary": self.prefer_binary,
|
|
45
|
+
"pre_release": self.pre_release,
|
|
46
|
+
}
|
|
47
|
+
self.key = tokenize(set(self.requirements), args)
|
|
48
|
+
|
|
49
|
+
def __repr__(self):
|
|
50
|
+
return (
|
|
51
|
+
f"<PythonPackOptions {self.requirements} force_rebuild={self.force_rebuild} "
|
|
52
|
+
f"prefer_binary={self.prefer_binary} pre_release={self.pre_release}>"
|
|
53
|
+
)
|
|
25
54
|
|
|
26
55
|
|
|
27
56
|
class MarkedFunction(Serializable):
|
|
28
57
|
func = FunctionField("func")
|
|
29
58
|
resources = ListField("resources", FieldTypes.string, default_factory=list)
|
|
59
|
+
pythonpacks = ListField("pythonpacks", FieldTypes.reference, default_factory=list)
|
|
30
60
|
|
|
31
61
|
def __init__(self, func: Optional[Callable] = None, **kw):
|
|
32
62
|
super().__init__(func=func, **kw)
|
|
@@ -54,13 +84,39 @@ def with_resources(*resources: Union[str, Resource], use_wrapper_class: bool = T
|
|
|
54
84
|
def func_wrapper(func):
|
|
55
85
|
str_resources = [res_to_str(r) for r in resources]
|
|
56
86
|
if not use_wrapper_class:
|
|
57
|
-
func
|
|
87
|
+
existing = getattr(func, "resources") or []
|
|
88
|
+
func.resources = existing + str_resources
|
|
89
|
+
return func
|
|
90
|
+
|
|
91
|
+
if isinstance(func, MarkedFunction):
|
|
92
|
+
func.resources = func.resources + str_resources
|
|
58
93
|
return func
|
|
94
|
+
return MarkedFunction(func, resources=str_resources)
|
|
95
|
+
|
|
96
|
+
return func_wrapper
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def with_python_requirements(
|
|
100
|
+
*requirements: str,
|
|
101
|
+
force_rebuild: bool = False,
|
|
102
|
+
prefer_binary: bool = False,
|
|
103
|
+
pre_release: bool = False,
|
|
104
|
+
):
|
|
105
|
+
result_req = []
|
|
106
|
+
for req in requirements:
|
|
107
|
+
result_req.extend(shlex.split(req))
|
|
59
108
|
|
|
109
|
+
def func_wrapper(func):
|
|
110
|
+
pack_item = PythonPackOptions(
|
|
111
|
+
requirements=requirements,
|
|
112
|
+
force_rebuild=force_rebuild,
|
|
113
|
+
prefer_binary=prefer_binary,
|
|
114
|
+
pre_release=pre_release,
|
|
115
|
+
)
|
|
60
116
|
if isinstance(func, MarkedFunction):
|
|
61
|
-
func.
|
|
117
|
+
func.pythonpacks.append(pack_item)
|
|
62
118
|
return func
|
|
63
|
-
return MarkedFunction(func,
|
|
119
|
+
return MarkedFunction(func, pythonpacks=[pack_item])
|
|
64
120
|
|
|
65
121
|
return func_wrapper
|
|
66
122
|
|
|
@@ -72,3 +128,7 @@ def get_udf_resources(
|
|
|
72
128
|
func: Callable,
|
|
73
129
|
) -> List[Union[Resource, str]]:
|
|
74
130
|
return getattr(func, "resources", None) or []
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_udf_pythonpacks(func: Callable) -> List[PythonPackOptions]:
|
|
134
|
+
return getattr(func, "pythonpacks", None) or []
|
maxframe/utils.py
CHANGED
|
@@ -33,7 +33,6 @@ import sys
|
|
|
33
33
|
import threading
|
|
34
34
|
import time
|
|
35
35
|
import tokenize as pytokenize
|
|
36
|
-
import traceback
|
|
37
36
|
import types
|
|
38
37
|
import weakref
|
|
39
38
|
import zlib
|
|
@@ -396,18 +395,6 @@ def build_tileable_dir_name(tileable_key: str) -> str:
|
|
|
396
395
|
return m.hexdigest()
|
|
397
396
|
|
|
398
397
|
|
|
399
|
-
def extract_messages_and_stacks(exc: Exception) -> Tuple[List[str], List[str]]:
|
|
400
|
-
cur_exc = exc
|
|
401
|
-
messages, stacks = [], []
|
|
402
|
-
while True:
|
|
403
|
-
messages.append(str(cur_exc))
|
|
404
|
-
stacks.append("".join(traceback.format_tb(cur_exc.__traceback__)))
|
|
405
|
-
if exc.__cause__ is None:
|
|
406
|
-
break
|
|
407
|
-
cur_exc = exc.__cause__
|
|
408
|
-
return messages, stacks
|
|
409
|
-
|
|
410
|
-
|
|
411
398
|
async def wait_http_response(
|
|
412
399
|
url: str, *, request_timeout: TimeoutType = None, **kwargs
|
|
413
400
|
) -> httpclient.HTTPResponse:
|
|
@@ -449,6 +436,21 @@ async def to_thread_pool(func, *args, pool=None, **kwargs):
|
|
|
449
436
|
return await loop.run_in_executor(pool, func_call)
|
|
450
437
|
|
|
451
438
|
|
|
439
|
+
def create_event(loop: asyncio.AbstractEventLoop) -> asyncio.Event:
|
|
440
|
+
"""
|
|
441
|
+
Create an asyncio.Event in a certain event loop.
|
|
442
|
+
"""
|
|
443
|
+
if sys.version_info[1] < 10 or loop is None:
|
|
444
|
+
return asyncio.Event(loop=loop)
|
|
445
|
+
|
|
446
|
+
# From Python3.10 the loop parameter has been removed. We should work around here.
|
|
447
|
+
old_loop = asyncio.get_running_loop()
|
|
448
|
+
asyncio.set_event_loop(loop)
|
|
449
|
+
event = asyncio.Event()
|
|
450
|
+
asyncio.set_event_loop(old_loop)
|
|
451
|
+
return event
|
|
452
|
+
|
|
453
|
+
|
|
452
454
|
class ToThreadCancelledError(asyncio.CancelledError):
|
|
453
455
|
def __init__(self, *args, result=None):
|
|
454
456
|
super().__init__(*args)
|
|
@@ -519,6 +521,7 @@ def config_odps_default_options():
|
|
|
519
521
|
"metaservice.client.cache.enable": "false",
|
|
520
522
|
"odps.sql.session.result.cache.enable": "false",
|
|
521
523
|
"odps.sql.submit.mode": "script",
|
|
524
|
+
"odps.sql.job.max.time.hours": 72,
|
|
522
525
|
}
|
|
523
526
|
|
|
524
527
|
|
|
@@ -1106,3 +1109,9 @@ def get_python_tag():
|
|
|
1106
1109
|
# todo add implementation suffix for non-GIL tags when PEP703 is ready
|
|
1107
1110
|
version_info = sys.version_info
|
|
1108
1111
|
return f"cp{version_info[0]}{version_info[1]}"
|
|
1112
|
+
|
|
1113
|
+
|
|
1114
|
+
def get_item_if_scalar(val: Any) -> Any:
|
|
1115
|
+
if isinstance(val, np.ndarray) and val.shape == ():
|
|
1116
|
+
return val.item()
|
|
1117
|
+
return val
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: maxframe
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0rc1
|
|
4
4
|
Summary: MaxFrame operator-based data analyze framework
|
|
5
|
-
Requires-Dist: numpy
|
|
5
|
+
Requires-Dist: numpy <2.0.0,>=1.19.0
|
|
6
6
|
Requires-Dist: pandas >=1.0.0
|
|
7
|
-
Requires-Dist: pyodps >=0.11.
|
|
7
|
+
Requires-Dist: pyodps >=0.11.6.1
|
|
8
8
|
Requires-Dist: scipy >=1.0
|
|
9
9
|
Requires-Dist: pyarrow >=1.0.0
|
|
10
10
|
Requires-Dist: msgpack >=1.0.0
|