maxframe 1.0.0rc2__cp311-cp311-macosx_10_9_universal2.whl → 1.0.0rc3__cp311-cp311-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (106) hide show
  1. maxframe/_utils.cpython-311-darwin.so +0 -0
  2. maxframe/codegen.py +3 -2
  3. maxframe/config/config.py +16 -9
  4. maxframe/config/validators.py +42 -12
  5. maxframe/conftest.py +13 -2
  6. maxframe/core/__init__.py +2 -13
  7. maxframe/core/entity/__init__.py +0 -4
  8. maxframe/core/entity/objects.py +45 -2
  9. maxframe/core/entity/output_types.py +0 -3
  10. maxframe/core/entity/tests/test_objects.py +43 -0
  11. maxframe/core/entity/tileables.py +5 -78
  12. maxframe/core/graph/__init__.py +2 -2
  13. maxframe/core/graph/builder/__init__.py +0 -1
  14. maxframe/core/graph/builder/base.py +5 -4
  15. maxframe/core/graph/builder/tileable.py +4 -4
  16. maxframe/core/graph/builder/utils.py +4 -8
  17. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  18. maxframe/core/graph/entity.py +9 -33
  19. maxframe/core/operator/__init__.py +2 -9
  20. maxframe/core/operator/base.py +3 -5
  21. maxframe/core/operator/objects.py +0 -9
  22. maxframe/core/operator/utils.py +55 -0
  23. maxframe/dataframe/datasource/read_odps_query.py +1 -1
  24. maxframe/dataframe/datasource/read_odps_table.py +1 -1
  25. maxframe/dataframe/datastore/to_odps.py +1 -1
  26. maxframe/dataframe/operators.py +1 -17
  27. maxframe/dataframe/reduction/core.py +2 -2
  28. maxframe/io/objects/__init__.py +24 -0
  29. maxframe/io/objects/core.py +140 -0
  30. maxframe/io/objects/tensor.py +76 -0
  31. maxframe/io/objects/tests/__init__.py +13 -0
  32. maxframe/io/objects/tests/test_object_io.py +97 -0
  33. maxframe/{odpsio → io/odpsio}/__init__.py +2 -0
  34. maxframe/{odpsio → io/odpsio}/arrow.py +4 -4
  35. maxframe/{odpsio → io/odpsio}/schema.py +5 -5
  36. maxframe/{odpsio → io/odpsio}/tableio.py +10 -4
  37. maxframe/io/odpsio/tests/__init__.py +13 -0
  38. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +3 -3
  39. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +3 -3
  40. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  41. maxframe/io/odpsio/volumeio.py +57 -0
  42. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  43. maxframe/learn/contrib/xgboost/core.py +87 -2
  44. maxframe/learn/contrib/xgboost/dmatrix.py +1 -4
  45. maxframe/learn/contrib/xgboost/predict.py +19 -5
  46. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  47. maxframe/learn/contrib/xgboost/train.py +25 -15
  48. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  49. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  50. maxframe/protocol.py +1 -15
  51. maxframe/remote/core.py +4 -8
  52. maxframe/serialization/__init__.py +1 -0
  53. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  54. maxframe/tensor/__init__.py +10 -2
  55. maxframe/tensor/arithmetic/isclose.py +1 -0
  56. maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
  57. maxframe/tensor/core.py +5 -136
  58. maxframe/tensor/datasource/array.py +3 -0
  59. maxframe/tensor/datasource/full.py +1 -1
  60. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  61. maxframe/tensor/indexing/flatnonzero.py +1 -1
  62. maxframe/tensor/merge/__init__.py +2 -0
  63. maxframe/tensor/merge/concatenate.py +98 -0
  64. maxframe/tensor/merge/tests/test_merge.py +30 -1
  65. maxframe/tensor/merge/vstack.py +70 -0
  66. maxframe/tensor/{base → misc}/__init__.py +2 -0
  67. maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
  68. maxframe/tensor/misc/atleast_2d.py +70 -0
  69. maxframe/tensor/misc/atleast_3d.py +85 -0
  70. maxframe/tensor/misc/tests/__init__.py +13 -0
  71. maxframe/tensor/{base → misc}/transpose.py +22 -18
  72. maxframe/tensor/operators.py +1 -7
  73. maxframe/tensor/random/core.py +1 -1
  74. maxframe/tensor/reduction/count_nonzero.py +1 -0
  75. maxframe/tensor/reduction/mean.py +1 -0
  76. maxframe/tensor/reduction/nanmean.py +1 -0
  77. maxframe/tensor/reduction/nanvar.py +2 -0
  78. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  79. maxframe/tensor/reduction/var.py +2 -0
  80. maxframe/tensor/utils.py +2 -22
  81. maxframe/typing_.py +4 -1
  82. maxframe/udf.py +8 -9
  83. maxframe/utils.py +15 -61
  84. maxframe-1.0.0rc3.dist-info/METADATA +104 -0
  85. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +101 -91
  86. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
  87. maxframe_client/fetcher.py +23 -42
  88. maxframe_client/session/graph.py +8 -2
  89. maxframe_client/session/odps.py +54 -18
  90. maxframe_client/tests/test_fetcher.py +1 -1
  91. maxframe_client/tests/test_session.py +14 -2
  92. maxframe/core/entity/chunks.py +0 -68
  93. maxframe/core/entity/fuse.py +0 -73
  94. maxframe/core/graph/builder/chunk.py +0 -430
  95. maxframe/odpsio/volumeio.py +0 -95
  96. maxframe-1.0.0rc2.dist-info/METADATA +0 -177
  97. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  98. /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
  99. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  100. /maxframe/tensor/{base → misc}/astype.py +0 -0
  101. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  102. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  103. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  104. /maxframe/tensor/{base → misc}/unique.py +0 -0
  105. /maxframe/tensor/{base → misc}/where.py +0 -0
  106. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -26,7 +26,8 @@ import pandas as pd
26
26
  from odps import ODPS
27
27
 
28
28
  from maxframe.config import options
29
- from maxframe.core import Entity, TileableGraph, enter_mode
29
+ from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
30
+ from maxframe.core.operator import Fetch
30
31
  from maxframe.dataframe import read_odps_table
31
32
  from maxframe.dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
32
33
  from maxframe.dataframe.datasource import PandasDataSourceOperator
@@ -36,11 +37,18 @@ from maxframe.errors import (
36
37
  NoTaskServerResponseError,
37
38
  SessionAlreadyClosedError,
38
39
  )
39
- from maxframe.odpsio import ODPSTableIO, pandas_to_arrow, pandas_to_odps_schema
40
+ from maxframe.io.objects import get_object_io_handler
41
+ from maxframe.io.odpsio import (
42
+ ODPSTableIO,
43
+ ODPSVolumeWriter,
44
+ pandas_to_arrow,
45
+ pandas_to_odps_schema,
46
+ )
40
47
  from maxframe.protocol import (
41
48
  DagInfo,
42
49
  DagStatus,
43
50
  ODPSTableResultInfo,
51
+ ODPSVolumeResultInfo,
44
52
  ResultInfo,
45
53
  SessionInfo,
46
54
  )
@@ -51,8 +59,13 @@ from maxframe.session import (
51
59
  Profiling,
52
60
  Progress,
53
61
  )
62
+ from maxframe.tensor.datasource import ArrayDataSource
54
63
  from maxframe.typing_ import TileableType
55
- from maxframe.utils import ToThreadMixin, build_temp_table_name
64
+ from maxframe.utils import (
65
+ ToThreadMixin,
66
+ build_session_volume_name,
67
+ build_temp_table_name,
68
+ )
56
69
 
57
70
  from ..clients.framedriver import FrameDriverClient
58
71
  from ..fetcher import get_fetcher_cls
@@ -139,14 +152,9 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
139
152
  self._session_id = session_info.session_id
140
153
  await self._show_logview_address()
141
154
 
142
- def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
143
- if (
144
- not isinstance(t.op, PandasDataSourceOperator)
145
- or t.op.get_data() is None
146
- or t.inputs
147
- ):
148
- return None
149
-
155
+ def _upload_and_get_table_read_tileable(
156
+ self, t: TileableType
157
+ ) -> Optional[TileableType]:
150
158
  schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
151
159
  if self._odps_entry.exist_table(table_meta.table_name):
152
160
  self._odps_entry.delete_table(
@@ -193,8 +201,29 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
193
201
  read_tileable.params = t.params
194
202
  return read_tileable.data
195
203
 
204
+ def _upload_and_get_vol_read_tileable(
205
+ self, t: TileableType
206
+ ) -> Optional[TileableType]:
207
+ vol_name = build_session_volume_name(self.session_id)
208
+ writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
209
+ io_handler = get_object_io_handler(t)
210
+ io_handler().write_object(writer, t, t.op.data)
211
+ return build_fetch(t).data
212
+
213
+ def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
214
+ if (
215
+ not isinstance(t.op, (ArrayDataSource, PandasDataSourceOperator))
216
+ or t.op.get_data() is None
217
+ or t.inputs
218
+ ):
219
+ return None
220
+ if isinstance(t.op, PandasDataSourceOperator):
221
+ return self._upload_and_get_table_read_tileable(t)
222
+ else:
223
+ return self._upload_and_get_vol_read_tileable(t)
224
+
196
225
  @enter_mode(kernel=True, build=True)
197
- def _scan_and_replace_pandas_sources(
226
+ def _scan_and_replace_local_sources(
198
227
  self, graph: TileableGraph
199
228
  ) -> Dict[TileableType, TileableType]:
200
229
  """Replaces Pandas data sources with temp table sources in the graph"""
@@ -223,14 +252,21 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
223
252
  @enter_mode(kernel=True, build=True)
224
253
  def _get_input_infos(self, tileables: List[TileableType]) -> Dict[str, ResultInfo]:
225
254
  """Generate ResultInfo structs from generated temp tables"""
255
+ vol_name = build_session_volume_name(self.session_id)
256
+
226
257
  infos = dict()
227
258
  for t in tileables:
228
259
  key = t.key
229
- if not isinstance(t.op, DataFrameReadODPSTable):
230
- if not isinstance(t.inputs[0].op, DataFrameReadODPSTable):
231
- continue
232
- t = t.inputs[0]
233
- infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
260
+ if isinstance(t.op, DataFrameReadODPSTable):
261
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
262
+ else:
263
+ if isinstance(t.op, Fetch):
264
+ infos[key] = ODPSVolumeResultInfo(
265
+ volume_name=vol_name, volume_path=t.key
266
+ )
267
+ elif t.inputs and isinstance(t.inputs[0].op, DataFrameReadODPSTable):
268
+ t = t.inputs[0]
269
+ infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
234
270
  return infos
235
271
 
236
272
  async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
@@ -242,7 +278,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
242
278
  tileable_graph, to_execute_tileables = gen_submit_tileable_graph(
243
279
  self, tileables, tileable_to_copied
244
280
  )
245
- source_replacements = self._scan_and_replace_pandas_sources(tileable_graph)
281
+ source_replacements = self._scan_and_replace_local_sources(tileable_graph)
246
282
 
247
283
  # we need to manage uploaded data sources with refcounting mechanism
248
284
  # as nodes in tileable_graph are copied, we need to use original nodes
@@ -22,7 +22,7 @@ from odps import ODPS
22
22
 
23
23
  import maxframe.dataframe as md
24
24
  from maxframe.config import options
25
- from maxframe.odpsio import ODPSTableIO
25
+ from maxframe.io.odpsio import ODPSTableIO
26
26
  from maxframe.protocol import ODPSTableResultInfo, ResultType
27
27
  from maxframe.tests.utils import tn
28
28
 
@@ -247,7 +247,19 @@ def test_run_and_fetch_series(start_mock_session):
247
247
  )
248
248
 
249
249
 
250
- def test_run_remote_success(start_mock_session):
250
+ def test_execute_with_tensor(oss_config, start_mock_session):
251
+ pd_df = pd.DataFrame(
252
+ {"angles": [0, 3, 4], "degrees": [360, 180, 360]},
253
+ index=["circle", "triangle", "rectangle"],
254
+ )
255
+ df = md.DataFrame(pd_df)
256
+
257
+ result = (df - [1, 2]).execute().fetch()
258
+ expected = pd_df - [1, 2]
259
+ pd.testing.assert_frame_equal(result, expected)
260
+
261
+
262
+ def test_run_remote_success(oss_config, start_mock_session):
251
263
  def func(a, b):
252
264
  return a + b
253
265
 
@@ -258,7 +270,7 @@ def test_run_remote_success(start_mock_session):
258
270
  assert result == 21
259
271
 
260
272
 
261
- def test_run_remote_error(start_mock_session):
273
+ def test_run_remote_error(oss_config, start_mock_session):
262
274
  def func():
263
275
  raise ValueError
264
276
 
@@ -1,68 +0,0 @@
1
- # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from ...serialization.serializables import BoolField, FieldTypes, TupleField
16
- from ...utils import tokenize
17
- from .core import Entity, EntityData
18
-
19
-
20
- class ChunkData(EntityData):
21
- __slots__ = ()
22
-
23
- is_broadcaster = BoolField("is_broadcaster", default=False)
24
- # If the operator is a shuffle mapper, this flag indicates whether the current chunk is mapper chunk when
25
- # the operator produce multiple chunks such as TensorUnique.
26
- is_mapper = BoolField("is_mapper", default=None)
27
- # optional fields
28
- _index = TupleField("index", FieldTypes.uint32)
29
-
30
- def __repr__(self):
31
- if self.op.stage is None:
32
- return (
33
- f"{type(self).__name__} <op={type(self.op).__name__}, "
34
- f"key={self.key}>"
35
- )
36
- else:
37
- return (
38
- f"{type(self).__name__} <op={type(self.op).__name__}, "
39
- f"stage={self.op.stage.name}, key={self.key}>"
40
- )
41
-
42
- @property
43
- def index(self):
44
- return getattr(self, "_index", None)
45
-
46
- @property
47
- def device(self):
48
- return self.op.device
49
-
50
- def _update_key(self):
51
- object.__setattr__(
52
- self,
53
- "_key",
54
- tokenize(
55
- type(self).__name__,
56
- *(getattr(self, k, None) for k in self._keys_ if k != "_index"),
57
- ),
58
- )
59
-
60
-
61
- class Chunk(Entity):
62
- _allow_data_type_ = (ChunkData,)
63
-
64
- def __repr__(self):
65
- return f"{type(self).__name__}({self._data.__repr__()})"
66
-
67
-
68
- CHUNK_TYPE = (Chunk, ChunkData)
@@ -1,73 +0,0 @@
1
- # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import numpy as np
16
-
17
- from ...serialization.serializables import ReferenceField
18
- from .chunks import CHUNK_TYPE, Chunk, ChunkData
19
-
20
-
21
- class FuseChunkData(ChunkData):
22
- __slots__ = ("_inited",)
23
-
24
- _chunk = ReferenceField(
25
- "chunk", CHUNK_TYPE, on_serialize=lambda x: x.data if hasattr(x, "data") else x
26
- )
27
-
28
- def __init__(self, *args, **kwargs):
29
- self._inited = False
30
- super().__init__(*args, **kwargs)
31
- self._extra_params = {}
32
- self._inited = True
33
-
34
- @property
35
- def chunk(self):
36
- return self._chunk
37
-
38
- @property
39
- def composed(self):
40
- # for compatibility, just return the topological ordering,
41
- # once we apply optimization on the subgraph,
42
- # `composed` is not needed any more and should be removed then.
43
- assert getattr(self._op, "fuse_graph", None) is not None
44
- fuse_graph = self._op.fuse_graph
45
- return list(fuse_graph.topological_iter())
46
-
47
- def __getattr__(self, attr):
48
- if not self._inited:
49
- return object.__getattribute__(self, attr)
50
- if attr in self._extra_params:
51
- return self._extra_params[attr]
52
- try:
53
- return getattr(self._chunk, attr)
54
- except AttributeError:
55
- return object.__getattribute__(self, attr)
56
-
57
- def __setattr__(self, attr, value):
58
- if attr == "params":
59
- self._chunk.params = value
60
- else:
61
- super().__setattr__(attr, value)
62
-
63
- @property
64
- def nbytes(self):
65
- return np.prod(self.shape) * self.dtype.itemsize
66
-
67
-
68
- class FuseChunk(Chunk):
69
- __slots__ = ()
70
- _allow_data_type_ = (FuseChunkData,)
71
-
72
-
73
- FUSE_CHUNK_TYPE = (FuseChunkData, FuseChunk)