maxframe 0.1.0b3__cp310-cp310-win_amd64.whl → 0.1.0b5__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp310-win_amd64.pyd +0 -0
- maxframe/codegen.py +46 -1
- maxframe/config/config.py +14 -1
- maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
- maxframe/dataframe/__init__.py +6 -0
- maxframe/dataframe/core.py +34 -10
- maxframe/dataframe/datasource/read_odps_query.py +6 -2
- maxframe/dataframe/datasource/read_odps_table.py +5 -1
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +2 -2
- maxframe/dataframe/datastore/to_odps.py +2 -2
- maxframe/dataframe/indexing/reset_index.py +1 -17
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +61 -0
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +2 -1
- maxframe/dataframe/utils.py +7 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
- maxframe/odpsio/arrow.py +10 -6
- maxframe/odpsio/schema.py +18 -5
- maxframe/odpsio/tableio.py +22 -0
- maxframe/odpsio/tests/test_schema.py +41 -11
- maxframe/opcodes.py +8 -0
- maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/session.py +32 -2
- maxframe/tensor/__init__.py +1 -1
- maxframe/tensor/base/__init__.py +2 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/datasource/array.py +4 -2
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/udf.py +63 -3
- maxframe/utils.py +11 -0
- {maxframe-0.1.0b3.dist-info → maxframe-0.1.0b5.dist-info}/METADATA +2 -2
- {maxframe-0.1.0b3.dist-info → maxframe-0.1.0b5.dist-info}/RECORD +58 -40
- maxframe_client/fetcher.py +65 -3
- maxframe_client/session/odps.py +41 -11
- maxframe_client/session/task.py +26 -53
- maxframe_client/tests/test_session.py +49 -1
- {maxframe-0.1.0b3.dist-info → maxframe-0.1.0b5.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b3.dist-info → maxframe-0.1.0b5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .core import convert_to_tensor_or_dataframe
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ...dataframe import DataFrame, Series
|
|
18
|
+
from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
19
|
+
from ...tensor import tensor as astensor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def convert_to_tensor_or_dataframe(item):
|
|
23
|
+
if isinstance(item, (DATAFRAME_TYPE, pd.DataFrame)):
|
|
24
|
+
item = DataFrame(item)
|
|
25
|
+
elif isinstance(item, (SERIES_TYPE, pd.Series)):
|
|
26
|
+
item = Series(item)
|
|
27
|
+
else:
|
|
28
|
+
item = astensor(item)
|
|
29
|
+
return item
|
|
Binary file
|
maxframe/odpsio/arrow.py
CHANGED
|
@@ -17,10 +17,9 @@ from typing import Any, Tuple, Union
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
import pyarrow as pa
|
|
19
19
|
|
|
20
|
-
import maxframe.tensor as mt
|
|
21
|
-
|
|
22
20
|
from ..core import OutputType
|
|
23
21
|
from ..protocol import DataFrameTableMeta
|
|
22
|
+
from ..tensor.core import TENSOR_TYPE
|
|
24
23
|
from ..typing_ import ArrowTableType, PandasObjectTypes
|
|
25
24
|
from .schema import build_dataframe_table_meta
|
|
26
25
|
|
|
@@ -65,20 +64,25 @@ def arrow_to_pandas(
|
|
|
65
64
|
raise ValueError(f"Does not support meta type {table_meta.type!r}")
|
|
66
65
|
|
|
67
66
|
|
|
68
|
-
def pandas_to_arrow(
|
|
69
|
-
|
|
67
|
+
def pandas_to_arrow(
|
|
68
|
+
df: Any, nthreads=1, ignore_index=False
|
|
69
|
+
) -> Tuple[ArrowTableType, DataFrameTableMeta]:
|
|
70
|
+
table_meta = build_dataframe_table_meta(df, ignore_index)
|
|
70
71
|
df = df.copy() if callable(getattr(df, "copy", None)) else df
|
|
71
72
|
if table_meta.type in (OutputType.dataframe, OutputType.series):
|
|
72
73
|
if table_meta.type == OutputType.series:
|
|
73
74
|
df = df.to_frame("_data" if df.name is None else df.name)
|
|
74
75
|
df.columns = pd.Index(table_meta.table_column_names)
|
|
75
|
-
|
|
76
|
+
if not ignore_index:
|
|
77
|
+
df = df.rename_axis(table_meta.table_index_column_names).reset_index()
|
|
78
|
+
elif ignore_index:
|
|
79
|
+
df = pd.DataFrame([], columns=[])
|
|
76
80
|
elif table_meta.type == OutputType.index:
|
|
77
81
|
names = [f"_idx_{idx}" for idx in range(len(df.names))]
|
|
78
82
|
df = df.to_frame(name=names[0] if len(names) == 1 else names)
|
|
79
83
|
elif table_meta.type == OutputType.scalar:
|
|
80
84
|
names = ["_idx_0"]
|
|
81
|
-
if isinstance(df,
|
|
85
|
+
if isinstance(df, TENSOR_TYPE):
|
|
82
86
|
df = pd.DataFrame([], columns=names).astype({names[0]: df.dtype})
|
|
83
87
|
else:
|
|
84
88
|
df = pd.DataFrame([[df]], columns=names)
|
maxframe/odpsio/schema.py
CHANGED
|
@@ -175,7 +175,9 @@ def _scalar_as_index(df_obj: Any) -> pd.Index:
|
|
|
175
175
|
|
|
176
176
|
|
|
177
177
|
def pandas_to_odps_schema(
|
|
178
|
-
df_obj: Any,
|
|
178
|
+
df_obj: Any,
|
|
179
|
+
unknown_as_string: bool = False,
|
|
180
|
+
ignore_index=False,
|
|
179
181
|
) -> Tuple[odps_types.OdpsSchema, DataFrameTableMeta]:
|
|
180
182
|
from .. import dataframe as md
|
|
181
183
|
from .arrow import pandas_to_arrow
|
|
@@ -209,7 +211,7 @@ def pandas_to_odps_schema(
|
|
|
209
211
|
else:
|
|
210
212
|
empty_df_obj = df_obj
|
|
211
213
|
|
|
212
|
-
arrow_data, table_meta = pandas_to_arrow(empty_df_obj)
|
|
214
|
+
arrow_data, table_meta = pandas_to_arrow(empty_df_obj, ignore_index=ignore_index)
|
|
213
215
|
return (
|
|
214
216
|
arrow_schema_to_odps_schema(
|
|
215
217
|
arrow_data.schema, unknown_as_string=unknown_as_string
|
|
@@ -268,7 +270,9 @@ def build_table_column_name(
|
|
|
268
270
|
return col_name
|
|
269
271
|
|
|
270
272
|
|
|
271
|
-
def build_dataframe_table_meta(
|
|
273
|
+
def build_dataframe_table_meta(
|
|
274
|
+
df_obj: Any, ignore_index: bool = False
|
|
275
|
+
) -> DataFrameTableMeta:
|
|
272
276
|
from .. import dataframe as md
|
|
273
277
|
|
|
274
278
|
col_to_count = defaultdict(lambda: 0)
|
|
@@ -285,6 +289,8 @@ def build_dataframe_table_meta(df_obj: Any) -> DataFrameTableMeta:
|
|
|
285
289
|
else: # pragma: no cover
|
|
286
290
|
raise TypeError(f"Cannot accept type {type(df_obj)}")
|
|
287
291
|
|
|
292
|
+
assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
|
|
293
|
+
|
|
288
294
|
if obj_type == OutputType.scalar:
|
|
289
295
|
pd_dtypes = pd.Series([])
|
|
290
296
|
column_index_names = []
|
|
@@ -340,12 +346,19 @@ def build_dataframe_table_meta(df_obj: Any) -> DataFrameTableMeta:
|
|
|
340
346
|
else:
|
|
341
347
|
index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
|
|
342
348
|
|
|
349
|
+
if ignore_index:
|
|
350
|
+
table_index_column_names = []
|
|
351
|
+
pd_index_dtypes = pd.Series([], index=[])
|
|
352
|
+
else:
|
|
353
|
+
table_index_column_names = [f"_idx_{i}" for i in range(len(index_obj.names))]
|
|
354
|
+
pd_index_dtypes = index_dtypes
|
|
355
|
+
|
|
343
356
|
return DataFrameTableMeta(
|
|
344
357
|
table_name=table_name,
|
|
345
358
|
type=obj_type,
|
|
346
359
|
table_column_names=final_sql_columns,
|
|
347
|
-
table_index_column_names=
|
|
360
|
+
table_index_column_names=table_index_column_names,
|
|
348
361
|
pd_column_dtypes=pd_dtypes,
|
|
349
362
|
pd_column_level_names=column_index_names,
|
|
350
|
-
pd_index_dtypes=
|
|
363
|
+
pd_index_dtypes=pd_index_dtypes,
|
|
351
364
|
)
|
maxframe/odpsio/tableio.py
CHANGED
|
@@ -183,6 +183,28 @@ class HaloTableIO(MCTableIO):
|
|
|
183
183
|
for pt in partitions
|
|
184
184
|
]
|
|
185
185
|
|
|
186
|
+
def get_table_record_count(
|
|
187
|
+
self, full_table_name: str, partitions: PartitionsType = None
|
|
188
|
+
):
|
|
189
|
+
from odps.apis.storage_api import SplitOptions, TableBatchScanRequest
|
|
190
|
+
|
|
191
|
+
table = self._odps.get_table(full_table_name)
|
|
192
|
+
client = StorageApiArrowClient(
|
|
193
|
+
self._odps, table, rest_endpoint=self._storage_api_endpoint
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
split_option = SplitOptions.SplitMode.SIZE
|
|
197
|
+
|
|
198
|
+
scan_kw = {
|
|
199
|
+
"required_partitions": self._convert_partitions(partitions),
|
|
200
|
+
"split_options": SplitOptions.get_default_options(split_option),
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
# todo add more options for partition column handling
|
|
204
|
+
req = TableBatchScanRequest(**scan_kw)
|
|
205
|
+
resp = client.create_read_session(req)
|
|
206
|
+
return resp.record_count
|
|
207
|
+
|
|
186
208
|
@contextmanager
|
|
187
209
|
def open_reader(
|
|
188
210
|
self,
|
|
@@ -30,20 +30,23 @@ from ..schema import (
|
|
|
30
30
|
)
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
def _wrap_maxframe_obj(obj, wrap=
|
|
34
|
-
if
|
|
33
|
+
def _wrap_maxframe_obj(obj, wrap="no"):
|
|
34
|
+
if wrap == "no":
|
|
35
35
|
return obj
|
|
36
36
|
if isinstance(obj, pd.DataFrame):
|
|
37
|
-
|
|
37
|
+
obj = md.DataFrame(obj)
|
|
38
38
|
elif isinstance(obj, pd.Series):
|
|
39
|
-
|
|
39
|
+
obj = md.Series(obj)
|
|
40
40
|
elif isinstance(obj, pd.Index):
|
|
41
|
-
|
|
41
|
+
obj = md.Index(obj)
|
|
42
42
|
else:
|
|
43
|
-
|
|
43
|
+
obj = mt.scalar(obj)
|
|
44
|
+
if wrap == "data":
|
|
45
|
+
return obj.data
|
|
46
|
+
return obj
|
|
44
47
|
|
|
45
48
|
|
|
46
|
-
@pytest.mark.parametrize("wrap_obj", [
|
|
49
|
+
@pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
|
|
47
50
|
def test_pandas_to_odps_schema_dataframe(wrap_obj):
|
|
48
51
|
data = pd.DataFrame(np.random.rand(100, 5), columns=list("ABCDE"))
|
|
49
52
|
|
|
@@ -61,6 +64,16 @@ def test_pandas_to_odps_schema_dataframe(wrap_obj):
|
|
|
61
64
|
assert meta.pd_column_level_names == [None]
|
|
62
65
|
assert meta.pd_index_level_names == [None]
|
|
63
66
|
|
|
67
|
+
test_df = _wrap_maxframe_obj(data, wrap=wrap_obj)
|
|
68
|
+
schema, meta = pandas_to_odps_schema(test_df, ignore_index=True)
|
|
69
|
+
assert [c.name for c in schema.columns] == list(test_df.dtypes.index.str.lower())
|
|
70
|
+
assert [c.type.name for c in schema.columns] == ["double"] * len(test_df.columns)
|
|
71
|
+
assert meta.type == OutputType.dataframe
|
|
72
|
+
assert meta.table_column_names == list(test_df.dtypes.index.str.lower())
|
|
73
|
+
assert meta.table_index_column_names == []
|
|
74
|
+
assert meta.pd_column_level_names == [None]
|
|
75
|
+
assert meta.pd_index_level_names == []
|
|
76
|
+
|
|
64
77
|
data.columns = pd.MultiIndex.from_tuples(
|
|
65
78
|
[("A", "A"), ("A", "B"), ("A", "C"), ("B", "A"), ("B", "B")], names=["c1", "c2"]
|
|
66
79
|
)
|
|
@@ -84,7 +97,7 @@ def test_pandas_to_odps_schema_dataframe(wrap_obj):
|
|
|
84
97
|
assert meta.pd_index_level_names == [None, None]
|
|
85
98
|
|
|
86
99
|
|
|
87
|
-
@pytest.mark.parametrize("wrap_obj", [
|
|
100
|
+
@pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
|
|
88
101
|
def test_pandas_to_odps_schema_series(wrap_obj):
|
|
89
102
|
data = pd.Series(np.random.rand(100))
|
|
90
103
|
|
|
@@ -99,6 +112,15 @@ def test_pandas_to_odps_schema_series(wrap_obj):
|
|
|
99
112
|
assert meta.pd_column_level_names == [None]
|
|
100
113
|
assert meta.pd_index_level_names == [None]
|
|
101
114
|
|
|
115
|
+
schema, meta = pandas_to_odps_schema(test_s, ignore_index=True)
|
|
116
|
+
assert [c.name for c in schema.columns] == ["_data"]
|
|
117
|
+
assert [c.type.name for c in schema.columns] == ["double"]
|
|
118
|
+
assert meta.type == OutputType.series
|
|
119
|
+
assert meta.table_column_names == ["_data"]
|
|
120
|
+
assert meta.table_index_column_names == []
|
|
121
|
+
assert meta.pd_column_level_names == [None]
|
|
122
|
+
assert meta.pd_index_level_names == []
|
|
123
|
+
|
|
102
124
|
data.index = pd.MultiIndex.from_arrays(
|
|
103
125
|
[np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)],
|
|
104
126
|
names=["c1", "c2"],
|
|
@@ -116,7 +138,7 @@ def test_pandas_to_odps_schema_series(wrap_obj):
|
|
|
116
138
|
assert meta.pd_index_level_names == ["c1", "c2"]
|
|
117
139
|
|
|
118
140
|
|
|
119
|
-
@pytest.mark.parametrize("wrap_obj", [
|
|
141
|
+
@pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
|
|
120
142
|
def test_pandas_to_odps_schema_index(wrap_obj):
|
|
121
143
|
data = pd.Index(np.random.randint(0, 100, 100))
|
|
122
144
|
|
|
@@ -130,6 +152,9 @@ def test_pandas_to_odps_schema_index(wrap_obj):
|
|
|
130
152
|
assert meta.pd_column_level_names == []
|
|
131
153
|
assert meta.pd_index_level_names == [None]
|
|
132
154
|
|
|
155
|
+
with pytest.raises(AssertionError):
|
|
156
|
+
pandas_to_odps_schema(test_idx, unknown_as_string=True, ignore_index=True)
|
|
157
|
+
|
|
133
158
|
data = pd.MultiIndex.from_arrays(
|
|
134
159
|
[np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)],
|
|
135
160
|
names=["c1", "c2"],
|
|
@@ -145,11 +170,13 @@ def test_pandas_to_odps_schema_index(wrap_obj):
|
|
|
145
170
|
assert meta.pd_index_level_names == ["c1", "c2"]
|
|
146
171
|
|
|
147
172
|
|
|
148
|
-
@pytest.mark.parametrize("wrap_obj", [
|
|
173
|
+
@pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
|
|
149
174
|
def test_pandas_to_odps_schema_scalar(wrap_obj):
|
|
150
175
|
data = 1234.56
|
|
151
176
|
|
|
152
177
|
test_scalar = _wrap_maxframe_obj(data, wrap=wrap_obj)
|
|
178
|
+
if wrap_obj != "no":
|
|
179
|
+
test_scalar.op.data = None
|
|
153
180
|
schema, meta = pandas_to_odps_schema(test_scalar, unknown_as_string=True)
|
|
154
181
|
assert schema.columns[0].name == "_idx_0"
|
|
155
182
|
assert schema.columns[0].type.name == "double"
|
|
@@ -159,6 +186,9 @@ def test_pandas_to_odps_schema_scalar(wrap_obj):
|
|
|
159
186
|
assert meta.pd_column_level_names == []
|
|
160
187
|
assert meta.pd_index_level_names == [None]
|
|
161
188
|
|
|
189
|
+
with pytest.raises(AssertionError):
|
|
190
|
+
pandas_to_odps_schema(test_scalar, unknown_as_string=True, ignore_index=True)
|
|
191
|
+
|
|
162
192
|
|
|
163
193
|
def test_odps_arrow_schema_conversion():
|
|
164
194
|
odps_schema = odps_types.OdpsSchema(
|
|
@@ -254,7 +284,7 @@ def test_build_column_name():
|
|
|
254
284
|
assert build_table_column_name(4, ("A", 1), records) == "a_1"
|
|
255
285
|
|
|
256
286
|
|
|
257
|
-
@pytest.mark.parametrize("wrap_obj", [
|
|
287
|
+
@pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
|
|
258
288
|
def test_build_table_meta(wrap_obj):
|
|
259
289
|
data = pd.DataFrame(
|
|
260
290
|
np.random.rand(100, 7),
|
maxframe/opcodes.py
CHANGED
|
@@ -386,6 +386,9 @@ DATAFRAME_EVAL = 738
|
|
|
386
386
|
DUPLICATED = 739
|
|
387
387
|
DELETE = 740
|
|
388
388
|
ALIGN = 741
|
|
389
|
+
CASE_WHEN = 742
|
|
390
|
+
PIVOT = 743
|
|
391
|
+
PIVOT_TABLE = 744
|
|
389
392
|
|
|
390
393
|
FUSE = 801
|
|
391
394
|
|
|
@@ -564,6 +567,11 @@ CHOLESKY_FUSE = 999988
|
|
|
564
567
|
# MaxFrame-dedicated functions
|
|
565
568
|
DATAFRAME_RESHUFFLE = 10001
|
|
566
569
|
|
|
570
|
+
# MaxFrame internal operators
|
|
571
|
+
DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
|
|
572
|
+
GROUPBY_AGGR_SAME_INDEX_MERGE = 100002
|
|
573
|
+
DATAFRAME_ILOC_GET_AND_RENAME_ITEM = 100003
|
|
574
|
+
|
|
567
575
|
# fetches
|
|
568
576
|
FETCH_SHUFFLE = 999998
|
|
569
577
|
FETCH = 999999
|
|
Binary file
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from concurrent.futures import Executor
|
|
16
|
+
from typing import Any, Callable, Dict, List, TypeVar
|
|
17
|
+
|
|
18
|
+
def buffered(func: Callable) -> Callable: ...
|
|
19
|
+
def fast_id(obj: Any) -> int: ...
|
|
20
|
+
|
|
21
|
+
LoadType = TypeVar("LoadType")
|
|
22
|
+
|
|
23
|
+
def load_type(class_name: str, parent_class: LoadType) -> LoadType: ...
|
|
24
|
+
|
|
25
|
+
class PickleContainer:
|
|
26
|
+
def __init__(self, buffers: List[bytes]): ...
|
|
27
|
+
def get(self) -> Any: ...
|
|
28
|
+
def get_buffers(self) -> List[bytes]: ...
|
|
29
|
+
|
|
30
|
+
class Serializer:
|
|
31
|
+
serializer_id: int
|
|
32
|
+
def serial(self, obj: Any, context: Dict): ...
|
|
33
|
+
def deserial(self, serialized: List, context: Dict, subs: List[Any]): ...
|
|
34
|
+
def on_deserial_error(
|
|
35
|
+
self,
|
|
36
|
+
serialized: List,
|
|
37
|
+
context: Dict,
|
|
38
|
+
subs_serialized: List,
|
|
39
|
+
error_index: int,
|
|
40
|
+
exc: BaseException,
|
|
41
|
+
): ...
|
|
42
|
+
@classmethod
|
|
43
|
+
def register(cls, obj_type): ...
|
|
44
|
+
@classmethod
|
|
45
|
+
def unregister(cls, obj_type): ...
|
|
46
|
+
|
|
47
|
+
class Placeholder:
|
|
48
|
+
id: int
|
|
49
|
+
callbacks: List[Callable]
|
|
50
|
+
def __init__(self, id_: int): ...
|
|
51
|
+
def __hash__(self): ...
|
|
52
|
+
def __eq__(self, other): ...
|
|
53
|
+
|
|
54
|
+
def serialize(obj: Any, context: Dict = None): ...
|
|
55
|
+
async def serialize_with_spawn(
|
|
56
|
+
obj: Any,
|
|
57
|
+
context: Dict = None,
|
|
58
|
+
spawn_threshold: int = 100,
|
|
59
|
+
executor: Executor = None,
|
|
60
|
+
): ...
|
|
61
|
+
def deserialize(headers: List, buffers: List, context: Dict = None): ...
|
maxframe/session.py
CHANGED
|
@@ -365,6 +365,15 @@ class AbstractAsyncSession(AbstractSession, metaclass=ABCMeta):
|
|
|
365
365
|
Stop server.
|
|
366
366
|
"""
|
|
367
367
|
|
|
368
|
+
@abstractmethod
|
|
369
|
+
async def get_logview_address(self, hours=None) -> Optional[str]:
|
|
370
|
+
"""
|
|
371
|
+
Get Logview address
|
|
372
|
+
Returns
|
|
373
|
+
-------
|
|
374
|
+
Logview address
|
|
375
|
+
"""
|
|
376
|
+
|
|
368
377
|
def close(self):
|
|
369
378
|
asyncio.run(self.destroy())
|
|
370
379
|
|
|
@@ -549,6 +558,15 @@ class AbstractSyncSession(AbstractSession, metaclass=ABCMeta):
|
|
|
549
558
|
|
|
550
559
|
return fetch(tileables, self, offsets=offsets, sizes=sizes)
|
|
551
560
|
|
|
561
|
+
@abstractmethod
|
|
562
|
+
def get_logview_address(self, hours=None) -> Optional[str]:
|
|
563
|
+
"""
|
|
564
|
+
Get logview address
|
|
565
|
+
Returns
|
|
566
|
+
-------
|
|
567
|
+
logview address
|
|
568
|
+
"""
|
|
569
|
+
|
|
552
570
|
|
|
553
571
|
def _delegate_to_isolated_session(func: Union[Callable, Coroutine]):
|
|
554
572
|
if asyncio.iscoroutinefunction(func):
|
|
@@ -728,6 +746,11 @@ class AsyncSession(AbstractAsyncSession):
|
|
|
728
746
|
await asyncio.wrap_future(asyncio.run_coroutine_threadsafe(coro, self._loop))
|
|
729
747
|
stop_isolation()
|
|
730
748
|
|
|
749
|
+
@implements(AbstractAsyncSession.get_logview_address)
|
|
750
|
+
@_delegate_to_isolated_session
|
|
751
|
+
async def get_logview_address(self, hours=None) -> Optional[str]:
|
|
752
|
+
pass # pragma: no cover
|
|
753
|
+
|
|
731
754
|
|
|
732
755
|
class ProgressBar:
|
|
733
756
|
def __init__(self, show_progress):
|
|
@@ -949,6 +972,11 @@ class SyncSession(AbstractSyncSession):
|
|
|
949
972
|
def get_cluster_versions(self) -> List[str]:
|
|
950
973
|
pass # pragma: no cover
|
|
951
974
|
|
|
975
|
+
@implements(AbstractSyncSession.get_logview_address)
|
|
976
|
+
@_delegate_to_isolated_session
|
|
977
|
+
def get_logview_address(self, hours=None) -> Optional[str]:
|
|
978
|
+
pass # pragma: no cover
|
|
979
|
+
|
|
952
980
|
def destroy(self):
|
|
953
981
|
coro = self._isolated_session.destroy()
|
|
954
982
|
asyncio.run_coroutine_threadsafe(coro, self._loop).result()
|
|
@@ -1211,7 +1239,7 @@ def new_session(
|
|
|
1211
1239
|
# load third party extensions.
|
|
1212
1240
|
ensure_isolation_created(kwargs)
|
|
1213
1241
|
|
|
1214
|
-
odps_entry = odps_entry or ODPS.from_environments()
|
|
1242
|
+
odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
|
|
1215
1243
|
if address is None:
|
|
1216
1244
|
from maxframe_client.session.consts import ODPS_SESSION_INSECURE_SCHEME
|
|
1217
1245
|
|
|
@@ -1255,7 +1283,9 @@ def get_default_or_create(**kwargs):
|
|
|
1255
1283
|
if session is None:
|
|
1256
1284
|
# no session attached, try to create one
|
|
1257
1285
|
warnings.warn(warning_msg)
|
|
1258
|
-
session = new_session(
|
|
1286
|
+
session = new_session(
|
|
1287
|
+
ODPS.from_global() or ODPS.from_environments(), **kwargs
|
|
1288
|
+
)
|
|
1259
1289
|
session.as_default()
|
|
1260
1290
|
if isinstance(session, IsolatedAsyncSession):
|
|
1261
1291
|
session = SyncSession.from_isolated_session(session)
|
maxframe/tensor/__init__.py
CHANGED
|
@@ -114,7 +114,7 @@ from .arithmetic import (
|
|
|
114
114
|
)
|
|
115
115
|
from .arithmetic import truediv as true_divide
|
|
116
116
|
from .arithmetic import trunc
|
|
117
|
-
from .base import broadcast_to, transpose, where
|
|
117
|
+
from .base import broadcast_to, transpose, unique, where
|
|
118
118
|
from .core import Tensor
|
|
119
119
|
from .datasource import (
|
|
120
120
|
arange,
|
maxframe/tensor/base/__init__.py
CHANGED
|
@@ -13,9 +13,11 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from .astype import TensorAstype
|
|
16
|
+
from .atleast_1d import atleast_1d
|
|
16
17
|
from .broadcast_to import TensorBroadcastTo, broadcast_to
|
|
17
18
|
from .ravel import ravel
|
|
18
19
|
from .transpose import transpose
|
|
20
|
+
from .unique import unique
|
|
19
21
|
from .where import TensorWhere, where
|
|
20
22
|
|
|
21
23
|
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright 1999-2021 Alibaba Group Holding Ltd.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from ...core import ExecutableTuple
|
|
20
|
+
from ..datasource import tensor as astensor
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def atleast_1d(*tensors):
|
|
24
|
+
"""
|
|
25
|
+
Convert inputs to tensors with at least one dimension.
|
|
26
|
+
|
|
27
|
+
Scalar inputs are converted to 1-dimensional tensors, whilst
|
|
28
|
+
higher-dimensional inputs are preserved.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
tensors1, tensors2, ... : array_like
|
|
33
|
+
One or more input tensors.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
ret : Tensor
|
|
38
|
+
An tensor, or list of tensors, each with ``a.ndim >= 1``.
|
|
39
|
+
Copies are made only if necessary.
|
|
40
|
+
|
|
41
|
+
See Also
|
|
42
|
+
--------
|
|
43
|
+
atleast_2d, atleast_3d
|
|
44
|
+
|
|
45
|
+
Examples
|
|
46
|
+
--------
|
|
47
|
+
>>> import maxframe.tensor as mt
|
|
48
|
+
|
|
49
|
+
>>> mt.atleast_1d(1.0).execute()
|
|
50
|
+
array([ 1.])
|
|
51
|
+
|
|
52
|
+
>>> x = mt.arange(9.0).reshape(3,3)
|
|
53
|
+
>>> mt.atleast_1d(x).execute()
|
|
54
|
+
array([[ 0., 1., 2.],
|
|
55
|
+
[ 3., 4., 5.],
|
|
56
|
+
[ 6., 7., 8.]])
|
|
57
|
+
>>> mt.atleast_1d(x) is x
|
|
58
|
+
True
|
|
59
|
+
|
|
60
|
+
>>> mt.atleast_1d(1, [3, 4]).execute()
|
|
61
|
+
[array([1]), array([3, 4])]
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
new_tensors = []
|
|
65
|
+
for x in tensors:
|
|
66
|
+
x = astensor(x)
|
|
67
|
+
if x.ndim == 0:
|
|
68
|
+
x = x[np.newaxis]
|
|
69
|
+
|
|
70
|
+
new_tensors.append(x)
|
|
71
|
+
|
|
72
|
+
if len(new_tensors) == 1:
|
|
73
|
+
return new_tensors[0]
|
|
74
|
+
return ExecutableTuple(new_tensors)
|