maxframe 1.0.0rc2__cp39-cp39-macosx_10_9_universal2.whl → 1.0.0rc3__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/codegen.py +3 -2
- maxframe/config/config.py +16 -9
- maxframe/config/validators.py +42 -12
- maxframe/conftest.py +13 -2
- maxframe/core/__init__.py +2 -13
- maxframe/core/entity/__init__.py +0 -4
- maxframe/core/entity/objects.py +45 -2
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/datasource/read_odps_query.py +1 -1
- maxframe/dataframe/datasource/read_odps_table.py +1 -1
- maxframe/dataframe/datastore/to_odps.py +1 -1
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +2 -0
- maxframe/{odpsio → io/odpsio}/arrow.py +4 -4
- maxframe/{odpsio → io/odpsio}/schema.py +5 -5
- maxframe/{odpsio → io/odpsio}/tableio.py +10 -4
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +3 -3
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +3 -3
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +57 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +1 -4
- maxframe/learn/contrib/xgboost/predict.py +19 -5
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +25 -15
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/protocol.py +1 -15
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/tensor/__init__.py +10 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +98 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +70 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +1 -0
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/utils.py +2 -22
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +15 -61
- maxframe-1.0.0rc3.dist-info/METADATA +104 -0
- {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +101 -91
- {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
- maxframe_client/fetcher.py +23 -42
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +54 -18
- maxframe_client/tests/test_fetcher.py +1 -1
- maxframe_client/tests/test_session.py +14 -2
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/volumeio.py +0 -95
- maxframe-1.0.0rc2.dist-info/METADATA +0 -177
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/unique.py +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0
maxframe_client/session/odps.py
CHANGED
|
@@ -26,7 +26,8 @@ import pandas as pd
|
|
|
26
26
|
from odps import ODPS
|
|
27
27
|
|
|
28
28
|
from maxframe.config import options
|
|
29
|
-
from maxframe.core import Entity, TileableGraph, enter_mode
|
|
29
|
+
from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
|
|
30
|
+
from maxframe.core.operator import Fetch
|
|
30
31
|
from maxframe.dataframe import read_odps_table
|
|
31
32
|
from maxframe.dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
32
33
|
from maxframe.dataframe.datasource import PandasDataSourceOperator
|
|
@@ -36,11 +37,18 @@ from maxframe.errors import (
|
|
|
36
37
|
NoTaskServerResponseError,
|
|
37
38
|
SessionAlreadyClosedError,
|
|
38
39
|
)
|
|
39
|
-
from maxframe.
|
|
40
|
+
from maxframe.io.objects import get_object_io_handler
|
|
41
|
+
from maxframe.io.odpsio import (
|
|
42
|
+
ODPSTableIO,
|
|
43
|
+
ODPSVolumeWriter,
|
|
44
|
+
pandas_to_arrow,
|
|
45
|
+
pandas_to_odps_schema,
|
|
46
|
+
)
|
|
40
47
|
from maxframe.protocol import (
|
|
41
48
|
DagInfo,
|
|
42
49
|
DagStatus,
|
|
43
50
|
ODPSTableResultInfo,
|
|
51
|
+
ODPSVolumeResultInfo,
|
|
44
52
|
ResultInfo,
|
|
45
53
|
SessionInfo,
|
|
46
54
|
)
|
|
@@ -51,8 +59,13 @@ from maxframe.session import (
|
|
|
51
59
|
Profiling,
|
|
52
60
|
Progress,
|
|
53
61
|
)
|
|
62
|
+
from maxframe.tensor.datasource import ArrayDataSource
|
|
54
63
|
from maxframe.typing_ import TileableType
|
|
55
|
-
from maxframe.utils import
|
|
64
|
+
from maxframe.utils import (
|
|
65
|
+
ToThreadMixin,
|
|
66
|
+
build_session_volume_name,
|
|
67
|
+
build_temp_table_name,
|
|
68
|
+
)
|
|
56
69
|
|
|
57
70
|
from ..clients.framedriver import FrameDriverClient
|
|
58
71
|
from ..fetcher import get_fetcher_cls
|
|
@@ -139,14 +152,9 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
139
152
|
self._session_id = session_info.session_id
|
|
140
153
|
await self._show_logview_address()
|
|
141
154
|
|
|
142
|
-
def
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
or t.op.get_data() is None
|
|
146
|
-
or t.inputs
|
|
147
|
-
):
|
|
148
|
-
return None
|
|
149
|
-
|
|
155
|
+
def _upload_and_get_table_read_tileable(
|
|
156
|
+
self, t: TileableType
|
|
157
|
+
) -> Optional[TileableType]:
|
|
150
158
|
schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
|
|
151
159
|
if self._odps_entry.exist_table(table_meta.table_name):
|
|
152
160
|
self._odps_entry.delete_table(
|
|
@@ -193,8 +201,29 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
193
201
|
read_tileable.params = t.params
|
|
194
202
|
return read_tileable.data
|
|
195
203
|
|
|
204
|
+
def _upload_and_get_vol_read_tileable(
|
|
205
|
+
self, t: TileableType
|
|
206
|
+
) -> Optional[TileableType]:
|
|
207
|
+
vol_name = build_session_volume_name(self.session_id)
|
|
208
|
+
writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
|
|
209
|
+
io_handler = get_object_io_handler(t)
|
|
210
|
+
io_handler().write_object(writer, t, t.op.data)
|
|
211
|
+
return build_fetch(t).data
|
|
212
|
+
|
|
213
|
+
def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
|
|
214
|
+
if (
|
|
215
|
+
not isinstance(t.op, (ArrayDataSource, PandasDataSourceOperator))
|
|
216
|
+
or t.op.get_data() is None
|
|
217
|
+
or t.inputs
|
|
218
|
+
):
|
|
219
|
+
return None
|
|
220
|
+
if isinstance(t.op, PandasDataSourceOperator):
|
|
221
|
+
return self._upload_and_get_table_read_tileable(t)
|
|
222
|
+
else:
|
|
223
|
+
return self._upload_and_get_vol_read_tileable(t)
|
|
224
|
+
|
|
196
225
|
@enter_mode(kernel=True, build=True)
|
|
197
|
-
def
|
|
226
|
+
def _scan_and_replace_local_sources(
|
|
198
227
|
self, graph: TileableGraph
|
|
199
228
|
) -> Dict[TileableType, TileableType]:
|
|
200
229
|
"""Replaces Pandas data sources with temp table sources in the graph"""
|
|
@@ -223,14 +252,21 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
223
252
|
@enter_mode(kernel=True, build=True)
|
|
224
253
|
def _get_input_infos(self, tileables: List[TileableType]) -> Dict[str, ResultInfo]:
|
|
225
254
|
"""Generate ResultInfo structs from generated temp tables"""
|
|
255
|
+
vol_name = build_session_volume_name(self.session_id)
|
|
256
|
+
|
|
226
257
|
infos = dict()
|
|
227
258
|
for t in tileables:
|
|
228
259
|
key = t.key
|
|
229
|
-
if
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
260
|
+
if isinstance(t.op, DataFrameReadODPSTable):
|
|
261
|
+
infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
|
|
262
|
+
else:
|
|
263
|
+
if isinstance(t.op, Fetch):
|
|
264
|
+
infos[key] = ODPSVolumeResultInfo(
|
|
265
|
+
volume_name=vol_name, volume_path=t.key
|
|
266
|
+
)
|
|
267
|
+
elif t.inputs and isinstance(t.inputs[0].op, DataFrameReadODPSTable):
|
|
268
|
+
t = t.inputs[0]
|
|
269
|
+
infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
|
|
234
270
|
return infos
|
|
235
271
|
|
|
236
272
|
async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
|
|
@@ -242,7 +278,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
242
278
|
tileable_graph, to_execute_tileables = gen_submit_tileable_graph(
|
|
243
279
|
self, tileables, tileable_to_copied
|
|
244
280
|
)
|
|
245
|
-
source_replacements = self.
|
|
281
|
+
source_replacements = self._scan_and_replace_local_sources(tileable_graph)
|
|
246
282
|
|
|
247
283
|
# we need to manage uploaded data sources with refcounting mechanism
|
|
248
284
|
# as nodes in tileable_graph are copied, we need to use original nodes
|
|
@@ -22,7 +22,7 @@ from odps import ODPS
|
|
|
22
22
|
|
|
23
23
|
import maxframe.dataframe as md
|
|
24
24
|
from maxframe.config import options
|
|
25
|
-
from maxframe.odpsio import ODPSTableIO
|
|
25
|
+
from maxframe.io.odpsio import ODPSTableIO
|
|
26
26
|
from maxframe.protocol import ODPSTableResultInfo, ResultType
|
|
27
27
|
from maxframe.tests.utils import tn
|
|
28
28
|
|
|
@@ -247,7 +247,19 @@ def test_run_and_fetch_series(start_mock_session):
|
|
|
247
247
|
)
|
|
248
248
|
|
|
249
249
|
|
|
250
|
-
def
|
|
250
|
+
def test_execute_with_tensor(oss_config, start_mock_session):
|
|
251
|
+
pd_df = pd.DataFrame(
|
|
252
|
+
{"angles": [0, 3, 4], "degrees": [360, 180, 360]},
|
|
253
|
+
index=["circle", "triangle", "rectangle"],
|
|
254
|
+
)
|
|
255
|
+
df = md.DataFrame(pd_df)
|
|
256
|
+
|
|
257
|
+
result = (df - [1, 2]).execute().fetch()
|
|
258
|
+
expected = pd_df - [1, 2]
|
|
259
|
+
pd.testing.assert_frame_equal(result, expected)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def test_run_remote_success(oss_config, start_mock_session):
|
|
251
263
|
def func(a, b):
|
|
252
264
|
return a + b
|
|
253
265
|
|
|
@@ -258,7 +270,7 @@ def test_run_remote_success(start_mock_session):
|
|
|
258
270
|
assert result == 21
|
|
259
271
|
|
|
260
272
|
|
|
261
|
-
def test_run_remote_error(start_mock_session):
|
|
273
|
+
def test_run_remote_error(oss_config, start_mock_session):
|
|
262
274
|
def func():
|
|
263
275
|
raise ValueError
|
|
264
276
|
|
maxframe/core/entity/chunks.py
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from ...serialization.serializables import BoolField, FieldTypes, TupleField
|
|
16
|
-
from ...utils import tokenize
|
|
17
|
-
from .core import Entity, EntityData
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class ChunkData(EntityData):
|
|
21
|
-
__slots__ = ()
|
|
22
|
-
|
|
23
|
-
is_broadcaster = BoolField("is_broadcaster", default=False)
|
|
24
|
-
# If the operator is a shuffle mapper, this flag indicates whether the current chunk is mapper chunk when
|
|
25
|
-
# the operator produce multiple chunks such as TensorUnique.
|
|
26
|
-
is_mapper = BoolField("is_mapper", default=None)
|
|
27
|
-
# optional fields
|
|
28
|
-
_index = TupleField("index", FieldTypes.uint32)
|
|
29
|
-
|
|
30
|
-
def __repr__(self):
|
|
31
|
-
if self.op.stage is None:
|
|
32
|
-
return (
|
|
33
|
-
f"{type(self).__name__} <op={type(self.op).__name__}, "
|
|
34
|
-
f"key={self.key}>"
|
|
35
|
-
)
|
|
36
|
-
else:
|
|
37
|
-
return (
|
|
38
|
-
f"{type(self).__name__} <op={type(self.op).__name__}, "
|
|
39
|
-
f"stage={self.op.stage.name}, key={self.key}>"
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
@property
|
|
43
|
-
def index(self):
|
|
44
|
-
return getattr(self, "_index", None)
|
|
45
|
-
|
|
46
|
-
@property
|
|
47
|
-
def device(self):
|
|
48
|
-
return self.op.device
|
|
49
|
-
|
|
50
|
-
def _update_key(self):
|
|
51
|
-
object.__setattr__(
|
|
52
|
-
self,
|
|
53
|
-
"_key",
|
|
54
|
-
tokenize(
|
|
55
|
-
type(self).__name__,
|
|
56
|
-
*(getattr(self, k, None) for k in self._keys_ if k != "_index"),
|
|
57
|
-
),
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
class Chunk(Entity):
|
|
62
|
-
_allow_data_type_ = (ChunkData,)
|
|
63
|
-
|
|
64
|
-
def __repr__(self):
|
|
65
|
-
return f"{type(self).__name__}({self._data.__repr__()})"
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
CHUNK_TYPE = (Chunk, ChunkData)
|
maxframe/core/entity/fuse.py
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
import numpy as np
|
|
16
|
-
|
|
17
|
-
from ...serialization.serializables import ReferenceField
|
|
18
|
-
from .chunks import CHUNK_TYPE, Chunk, ChunkData
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class FuseChunkData(ChunkData):
|
|
22
|
-
__slots__ = ("_inited",)
|
|
23
|
-
|
|
24
|
-
_chunk = ReferenceField(
|
|
25
|
-
"chunk", CHUNK_TYPE, on_serialize=lambda x: x.data if hasattr(x, "data") else x
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
def __init__(self, *args, **kwargs):
|
|
29
|
-
self._inited = False
|
|
30
|
-
super().__init__(*args, **kwargs)
|
|
31
|
-
self._extra_params = {}
|
|
32
|
-
self._inited = True
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def chunk(self):
|
|
36
|
-
return self._chunk
|
|
37
|
-
|
|
38
|
-
@property
|
|
39
|
-
def composed(self):
|
|
40
|
-
# for compatibility, just return the topological ordering,
|
|
41
|
-
# once we apply optimization on the subgraph,
|
|
42
|
-
# `composed` is not needed any more and should be removed then.
|
|
43
|
-
assert getattr(self._op, "fuse_graph", None) is not None
|
|
44
|
-
fuse_graph = self._op.fuse_graph
|
|
45
|
-
return list(fuse_graph.topological_iter())
|
|
46
|
-
|
|
47
|
-
def __getattr__(self, attr):
|
|
48
|
-
if not self._inited:
|
|
49
|
-
return object.__getattribute__(self, attr)
|
|
50
|
-
if attr in self._extra_params:
|
|
51
|
-
return self._extra_params[attr]
|
|
52
|
-
try:
|
|
53
|
-
return getattr(self._chunk, attr)
|
|
54
|
-
except AttributeError:
|
|
55
|
-
return object.__getattribute__(self, attr)
|
|
56
|
-
|
|
57
|
-
def __setattr__(self, attr, value):
|
|
58
|
-
if attr == "params":
|
|
59
|
-
self._chunk.params = value
|
|
60
|
-
else:
|
|
61
|
-
super().__setattr__(attr, value)
|
|
62
|
-
|
|
63
|
-
@property
|
|
64
|
-
def nbytes(self):
|
|
65
|
-
return np.prod(self.shape) * self.dtype.itemsize
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class FuseChunk(Chunk):
|
|
69
|
-
__slots__ = ()
|
|
70
|
-
_allow_data_type_ = (FuseChunkData,)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
FUSE_CHUNK_TYPE = (FuseChunkData, FuseChunk)
|