maxframe 0.1.0b4__cp37-cp37m-win32.whl → 1.0.0__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (214) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/codegen.py +56 -5
  4. maxframe/config/config.py +78 -10
  5. maxframe/config/validators.py +42 -11
  6. maxframe/conftest.py +58 -14
  7. maxframe/core/__init__.py +2 -16
  8. maxframe/core/entity/__init__.py +1 -12
  9. maxframe/core/entity/executable.py +1 -1
  10. maxframe/core/entity/objects.py +46 -45
  11. maxframe/core/entity/output_types.py +0 -3
  12. maxframe/core/entity/tests/test_objects.py +43 -0
  13. maxframe/core/entity/tileables.py +5 -78
  14. maxframe/core/graph/__init__.py +2 -2
  15. maxframe/core/graph/builder/__init__.py +0 -1
  16. maxframe/core/graph/builder/base.py +5 -4
  17. maxframe/core/graph/builder/tileable.py +4 -4
  18. maxframe/core/graph/builder/utils.py +4 -8
  19. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  20. maxframe/core/graph/core.pyx +4 -4
  21. maxframe/core/graph/entity.py +9 -33
  22. maxframe/core/operator/__init__.py +2 -9
  23. maxframe/core/operator/base.py +3 -5
  24. maxframe/core/operator/objects.py +0 -9
  25. maxframe/core/operator/utils.py +55 -0
  26. maxframe/dataframe/__init__.py +2 -1
  27. maxframe/dataframe/arithmetic/around.py +5 -17
  28. maxframe/dataframe/arithmetic/core.py +15 -7
  29. maxframe/dataframe/arithmetic/docstring.py +7 -33
  30. maxframe/dataframe/arithmetic/equal.py +4 -2
  31. maxframe/dataframe/arithmetic/greater.py +4 -2
  32. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  33. maxframe/dataframe/arithmetic/less.py +2 -2
  34. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  36. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  37. maxframe/dataframe/core.py +58 -12
  38. maxframe/dataframe/datasource/date_range.py +2 -2
  39. maxframe/dataframe/datasource/read_odps_query.py +120 -24
  40. maxframe/dataframe/datasource/read_odps_table.py +9 -4
  41. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  42. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  43. maxframe/dataframe/datastore/to_odps.py +28 -0
  44. maxframe/dataframe/extensions/__init__.py +5 -0
  45. maxframe/dataframe/extensions/flatjson.py +131 -0
  46. maxframe/dataframe/extensions/flatmap.py +317 -0
  47. maxframe/dataframe/extensions/reshuffle.py +1 -1
  48. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  49. maxframe/dataframe/groupby/core.py +1 -1
  50. maxframe/dataframe/groupby/cum.py +0 -1
  51. maxframe/dataframe/groupby/fill.py +4 -1
  52. maxframe/dataframe/groupby/getitem.py +6 -0
  53. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  54. maxframe/dataframe/groupby/transform.py +5 -1
  55. maxframe/dataframe/indexing/align.py +1 -1
  56. maxframe/dataframe/indexing/loc.py +6 -4
  57. maxframe/dataframe/indexing/rename.py +5 -28
  58. maxframe/dataframe/indexing/sample.py +0 -1
  59. maxframe/dataframe/indexing/set_index.py +68 -1
  60. maxframe/dataframe/initializer.py +11 -1
  61. maxframe/dataframe/merge/__init__.py +9 -1
  62. maxframe/dataframe/merge/concat.py +41 -31
  63. maxframe/dataframe/merge/merge.py +237 -3
  64. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  65. maxframe/dataframe/misc/__init__.py +4 -0
  66. maxframe/dataframe/misc/apply.py +6 -11
  67. maxframe/dataframe/misc/case_when.py +141 -0
  68. maxframe/dataframe/misc/describe.py +2 -2
  69. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  70. maxframe/dataframe/misc/eval.py +4 -0
  71. maxframe/dataframe/misc/memory_usage.py +2 -2
  72. maxframe/dataframe/misc/pct_change.py +1 -83
  73. maxframe/dataframe/misc/pivot_table.py +262 -0
  74. maxframe/dataframe/misc/tests/test_misc.py +93 -1
  75. maxframe/dataframe/misc/transform.py +1 -30
  76. maxframe/dataframe/misc/value_counts.py +4 -17
  77. maxframe/dataframe/missing/dropna.py +1 -1
  78. maxframe/dataframe/missing/fillna.py +5 -5
  79. maxframe/dataframe/operators.py +1 -17
  80. maxframe/dataframe/plotting/core.py +2 -2
  81. maxframe/dataframe/reduction/core.py +4 -3
  82. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  83. maxframe/dataframe/sort/sort_values.py +1 -11
  84. maxframe/dataframe/statistics/corr.py +3 -3
  85. maxframe/dataframe/statistics/quantile.py +13 -19
  86. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  87. maxframe/dataframe/tests/test_initializer.py +33 -2
  88. maxframe/dataframe/utils.py +33 -11
  89. maxframe/dataframe/window/expanding.py +5 -3
  90. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  91. maxframe/errors.py +13 -0
  92. maxframe/extension.py +12 -0
  93. maxframe/io/__init__.py +13 -0
  94. maxframe/io/objects/__init__.py +24 -0
  95. maxframe/io/objects/core.py +140 -0
  96. maxframe/io/objects/tensor.py +76 -0
  97. maxframe/io/objects/tests/__init__.py +13 -0
  98. maxframe/io/objects/tests/test_object_io.py +97 -0
  99. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  100. maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
  101. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  102. maxframe/io/odpsio/tableio.py +719 -0
  103. maxframe/io/odpsio/tests/__init__.py +13 -0
  104. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
  105. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  106. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  107. maxframe/io/odpsio/volumeio.py +63 -0
  108. maxframe/learn/contrib/__init__.py +3 -1
  109. maxframe/learn/contrib/graph/__init__.py +15 -0
  110. maxframe/learn/contrib/graph/connected_components.py +215 -0
  111. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  112. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  113. maxframe/learn/contrib/llm/__init__.py +16 -0
  114. maxframe/learn/contrib/llm/core.py +54 -0
  115. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  116. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  117. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  118. maxframe/learn/contrib/llm/text.py +42 -0
  119. maxframe/learn/contrib/utils.py +52 -0
  120. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  121. maxframe/learn/contrib/xgboost/classifier.py +110 -0
  122. maxframe/learn/contrib/xgboost/core.py +241 -0
  123. maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
  124. maxframe/learn/contrib/xgboost/predict.py +121 -0
  125. maxframe/learn/contrib/xgboost/regressor.py +71 -0
  126. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  127. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  128. maxframe/learn/contrib/xgboost/train.py +132 -0
  129. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  130. maxframe/learn/utils/__init__.py +15 -0
  131. maxframe/learn/utils/core.py +29 -0
  132. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  133. maxframe/lib/mmh3.pyi +43 -0
  134. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  135. maxframe/lib/wrapped_pickle.py +2 -1
  136. maxframe/opcodes.py +11 -0
  137. maxframe/protocol.py +154 -27
  138. maxframe/remote/core.py +4 -8
  139. maxframe/serialization/__init__.py +1 -0
  140. maxframe/serialization/core.cp37-win32.pyd +0 -0
  141. maxframe/serialization/core.pxd +3 -0
  142. maxframe/serialization/core.pyi +64 -0
  143. maxframe/serialization/core.pyx +67 -26
  144. maxframe/serialization/exception.py +1 -1
  145. maxframe/serialization/pandas.py +52 -17
  146. maxframe/serialization/serializables/core.py +180 -15
  147. maxframe/serialization/serializables/field_type.py +4 -1
  148. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  149. maxframe/serialization/tests/test_serial.py +2 -1
  150. maxframe/session.py +37 -2
  151. maxframe/tensor/__init__.py +81 -2
  152. maxframe/tensor/arithmetic/isclose.py +1 -0
  153. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  154. maxframe/tensor/core.py +5 -136
  155. maxframe/tensor/datasource/array.py +7 -2
  156. maxframe/tensor/datasource/full.py +1 -1
  157. maxframe/tensor/datasource/scalar.py +1 -1
  158. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  159. maxframe/tensor/indexing/flatnonzero.py +1 -1
  160. maxframe/tensor/indexing/getitem.py +2 -0
  161. maxframe/tensor/merge/__init__.py +2 -0
  162. maxframe/tensor/merge/concatenate.py +101 -0
  163. maxframe/tensor/merge/tests/test_merge.py +30 -1
  164. maxframe/tensor/merge/vstack.py +74 -0
  165. maxframe/tensor/{base → misc}/__init__.py +4 -0
  166. maxframe/tensor/misc/atleast_1d.py +72 -0
  167. maxframe/tensor/misc/atleast_2d.py +70 -0
  168. maxframe/tensor/misc/atleast_3d.py +85 -0
  169. maxframe/tensor/misc/tests/__init__.py +13 -0
  170. maxframe/tensor/{base → misc}/transpose.py +22 -18
  171. maxframe/tensor/misc/unique.py +205 -0
  172. maxframe/tensor/operators.py +1 -7
  173. maxframe/tensor/random/core.py +1 -1
  174. maxframe/tensor/reduction/count_nonzero.py +2 -1
  175. maxframe/tensor/reduction/mean.py +1 -0
  176. maxframe/tensor/reduction/nanmean.py +1 -0
  177. maxframe/tensor/reduction/nanvar.py +2 -0
  178. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  179. maxframe/tensor/reduction/var.py +2 -0
  180. maxframe/tensor/statistics/quantile.py +2 -2
  181. maxframe/tensor/utils.py +2 -22
  182. maxframe/tests/test_protocol.py +34 -0
  183. maxframe/tests/test_utils.py +0 -12
  184. maxframe/tests/utils.py +17 -2
  185. maxframe/typing_.py +4 -1
  186. maxframe/udf.py +62 -3
  187. maxframe/utils.py +112 -86
  188. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +4 -4
  189. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
  190. maxframe_client/__init__.py +0 -1
  191. maxframe_client/clients/framedriver.py +4 -1
  192. maxframe_client/fetcher.py +123 -54
  193. maxframe_client/session/consts.py +3 -0
  194. maxframe_client/session/graph.py +8 -2
  195. maxframe_client/session/odps.py +223 -40
  196. maxframe_client/session/task.py +108 -80
  197. maxframe_client/tests/test_fetcher.py +21 -3
  198. maxframe_client/tests/test_session.py +136 -8
  199. maxframe/core/entity/chunks.py +0 -68
  200. maxframe/core/entity/fuse.py +0 -73
  201. maxframe/core/graph/builder/chunk.py +0 -430
  202. maxframe/odpsio/tableio.py +0 -300
  203. maxframe/odpsio/volumeio.py +0 -95
  204. maxframe_client/clients/spe.py +0 -104
  205. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  206. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  207. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  208. /maxframe/tensor/{base → misc}/astype.py +0 -0
  209. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  210. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  211. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  212. /maxframe/tensor/{base → misc}/where.py +0 -0
  213. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +0 -0
  214. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,300 +0,0 @@
1
- # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import os
16
- import time
17
- from abc import ABC, abstractmethod
18
- from contextlib import contextmanager
19
- from typing import List, Optional, Union
20
-
21
- import pyarrow as pa
22
- from odps import ODPS
23
- from odps.apis.storage_api import (
24
- StorageApiArrowClient,
25
- TableBatchScanResponse,
26
- TableBatchWriteResponse,
27
- )
28
- from odps.types import PartitionSpec
29
-
30
- from ..env import ODPS_STORAGE_API_ENDPOINT
31
- from .schema import odps_schema_to_arrow_schema
32
-
33
- PartitionsType = Union[List[str], str, None]
34
-
35
- _DEFAULT_ROW_BATCH_SIZE = 4096
36
-
37
-
38
- class MCTableIO(ABC):
39
- def __init__(self, odps: ODPS):
40
- self._odps = odps
41
-
42
- @abstractmethod
43
- def open_reader(
44
- self,
45
- full_table_name: str,
46
- partitions: PartitionsType = None,
47
- columns: Optional[List[str]] = None,
48
- start: Optional[int] = None,
49
- count: Optional[int] = None,
50
- ):
51
- raise NotImplementedError
52
-
53
- @abstractmethod
54
- def open_writer(
55
- self,
56
- full_table_name: str,
57
- partition: Optional[str] = None,
58
- overwrite: bool = True,
59
- ):
60
- raise NotImplementedError
61
-
62
-
63
- class HaloTableArrowReader:
64
- def __init__(
65
- self,
66
- client: StorageApiArrowClient,
67
- scan_info: TableBatchScanResponse,
68
- schema: pa.Schema,
69
- start: Optional[int] = None,
70
- count: Optional[int] = None,
71
- row_batch_size: Optional[int] = None,
72
- ):
73
- self._client = client
74
- self._scan_info = scan_info
75
-
76
- self._cur_split_id = -1
77
- self._cur_reader = None
78
-
79
- self._schema = schema
80
-
81
- self._start = start
82
- self._count = count
83
- self._cursor = 0
84
- self._row_batch_size = row_batch_size
85
-
86
- @property
87
- def count(self) -> int:
88
- return self._count
89
-
90
- def _open_next_reader(self):
91
- from odps.apis.storage_api import ReadRowsRequest
92
-
93
- if 0 <= self._scan_info.split_count <= self._cur_split_id + 1:
94
- # scan by split
95
- self._cur_reader = None
96
- return
97
- elif self._count is not None and self._cursor >= self._count:
98
- # scan by range
99
- self._cur_reader = None
100
- return
101
-
102
- read_rows_kw = {}
103
- if self._start is not None:
104
- read_rows_kw["row_index"] = self._start + self._cursor
105
- read_rows_kw["row_count"] = min(
106
- self._row_batch_size, self._count - self._cursor
107
- )
108
- self._cursor = min(self._count, self._cursor + self._row_batch_size)
109
-
110
- req = ReadRowsRequest(
111
- session_id=self._scan_info.session_id,
112
- split_index=self._cur_split_id + 1,
113
- **read_rows_kw,
114
- )
115
- self._cur_reader = self._client.read_rows_arrow(req)
116
- self._cur_split_id += 1
117
-
118
- def read(self):
119
- if self._cur_reader is None:
120
- self._open_next_reader()
121
- if self._cur_reader is None:
122
- return None
123
- while self._cur_reader is not None:
124
- batch = self._cur_reader.read()
125
- if batch is not None:
126
- return batch
127
- self._open_next_reader()
128
- return None
129
-
130
- def read_all(self) -> pa.Table:
131
- batches = []
132
- while True:
133
- batch = self.read()
134
- if batch is None:
135
- break
136
- batches.append(batch)
137
- if not batches:
138
- return self._schema.empty_table()
139
- return pa.Table.from_batches(batches)
140
-
141
-
142
- class HaloTableArrowWriter:
143
- def __init__(
144
- self, client: StorageApiArrowClient, write_info: TableBatchWriteResponse
145
- ):
146
- self._client = client
147
- self._write_info = write_info
148
-
149
- self._writer = None
150
-
151
- def open(self):
152
- from odps.apis.storage_api import WriteRowsRequest
153
-
154
- self._writer = self._client.write_rows_arrow(
155
- WriteRowsRequest(self._write_info.session_id)
156
- )
157
-
158
- def write(self, batch):
159
- if isinstance(batch, pa.Table):
160
- for b in batch.to_batches():
161
- self._writer.write(b)
162
- else:
163
- self._writer.write(batch)
164
-
165
- def close(self):
166
- commit_msg, is_success = self._writer.finish()
167
- if not is_success:
168
- raise IOError(commit_msg)
169
- return commit_msg
170
-
171
-
172
- class HaloTableIO(MCTableIO):
173
- _storage_api_endpoint = os.getenv(ODPS_STORAGE_API_ENDPOINT)
174
-
175
- @staticmethod
176
- def _convert_partitions(partitions: PartitionsType) -> Optional[List[str]]:
177
- if partitions is None:
178
- return []
179
- elif isinstance(partitions, (str, PartitionSpec)):
180
- partitions = [partitions]
181
- return [
182
- "/".join(f"{k}={v}" for k, v in PartitionSpec(pt).items())
183
- for pt in partitions
184
- ]
185
-
186
- @contextmanager
187
- def open_reader(
188
- self,
189
- full_table_name: str,
190
- partitions: PartitionsType = None,
191
- columns: Optional[List[str]] = None,
192
- partition_columns: Union[None, bool, List[str]] = None,
193
- start: Optional[int] = None,
194
- stop: Optional[int] = None,
195
- reverse_range: bool = False,
196
- row_batch_size: int = _DEFAULT_ROW_BATCH_SIZE,
197
- ):
198
- from odps.apis.storage_api import (
199
- SessionRequest,
200
- SplitOptions,
201
- Status,
202
- TableBatchScanRequest,
203
- )
204
-
205
- table = self._odps.get_table(full_table_name)
206
- client = StorageApiArrowClient(
207
- self._odps, table, rest_endpoint=self._storage_api_endpoint
208
- )
209
-
210
- split_option = SplitOptions.SplitMode.SIZE
211
- if start is not None or stop is not None:
212
- split_option = SplitOptions.SplitMode.ROW_OFFSET
213
-
214
- scan_kw = {
215
- "required_partitions": self._convert_partitions(partitions),
216
- "split_options": SplitOptions.get_default_options(split_option),
217
- }
218
- columns = columns or [c.name for c in table.table_schema.simple_columns]
219
- scan_kw["required_data_columns"] = columns
220
- if partition_columns is True:
221
- scan_kw["required_partition_columns"] = [
222
- c.name for c in table.table_schema.partitions
223
- ]
224
- else:
225
- scan_kw["required_partition_columns"] = partition_columns
226
-
227
- # todo add more options for partition column handling
228
- req = TableBatchScanRequest(**scan_kw)
229
- resp = client.create_read_session(req)
230
-
231
- session_id = resp.session_id
232
- status = resp.status
233
- while status == Status.WAIT:
234
- resp = client.get_read_session(SessionRequest(session_id))
235
- status = resp.status
236
- time.sleep(1.0)
237
-
238
- assert status == Status.OK
239
-
240
- count = None
241
- if start is not None or stop is not None:
242
- if reverse_range:
243
- start = start if start is not None else resp.record_count - 1
244
- stop = stop if stop is not None else -1
245
- else:
246
- start = start if start is not None else 0
247
- stop = stop if stop is not None else resp.record_count
248
- start = start if start >= 0 else resp.record_count + start
249
- stop = stop if stop >= 0 else resp.record_count + stop
250
- if reverse_range:
251
- count = start - stop
252
- start = stop + 1
253
- else:
254
- count = stop - start
255
-
256
- yield HaloTableArrowReader(
257
- client,
258
- resp,
259
- schema=odps_schema_to_arrow_schema(table.table_schema),
260
- start=start,
261
- count=count,
262
- row_batch_size=row_batch_size,
263
- )
264
-
265
- @contextmanager
266
- def open_writer(
267
- self,
268
- full_table_name: str,
269
- partition: Optional[str] = None,
270
- overwrite: bool = True,
271
- ):
272
- from odps.apis.storage_api import (
273
- SessionRequest,
274
- SessionStatus,
275
- TableBatchWriteRequest,
276
- )
277
-
278
- table = self._odps.get_table(full_table_name)
279
- client = StorageApiArrowClient(
280
- self._odps, table, rest_endpoint=self._storage_api_endpoint
281
- )
282
-
283
- part_strs = self._convert_partitions(partition)
284
- part_str = part_strs[0] if part_strs else None
285
- req = TableBatchWriteRequest(partition_spec=part_str, overwrite=overwrite)
286
- resp = client.create_write_session(req)
287
-
288
- session_id = resp.session_id
289
- writer = HaloTableArrowWriter(client, resp)
290
- writer.open()
291
-
292
- yield writer
293
-
294
- commit_msg = writer.close()
295
- resp = client.commit_write_session(
296
- SessionRequest(session_id=session_id), [commit_msg]
297
- )
298
- while resp.session_status == SessionStatus.COMMITTING:
299
- resp = client.get_write_session(SessionRequest(session_id=session_id))
300
- assert resp.session_status == SessionStatus.COMMITTED
@@ -1,95 +0,0 @@
1
- # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import List, Optional
16
-
17
- from odps import ODPS
18
- from odps.models import ExternalVolume, PartedVolume
19
- from odps.tunnel.volumetunnel import VolumeTunnel
20
-
21
-
22
- class ODPSVolumeReader:
23
- def __init__(self, odps_entry: ODPS, volume_name: str, volume_dir: str):
24
- self._odps_entry = odps_entry
25
- self._volume = odps_entry.get_volume(volume_name)
26
- self._volume_dir = volume_dir
27
-
28
- def list_files(self) -> List[str]:
29
- def _get_file_name(vol_file):
30
- if hasattr(vol_file, "name"):
31
- return vol_file.name
32
- return vol_file.path.rsplit("/", 1)[-1]
33
-
34
- return [
35
- _get_file_name(f)
36
- for f in self._odps_entry.list_volume_files(
37
- f"/{self._volume.name}/{self._volume_dir}"
38
- )
39
- ]
40
-
41
- def read_file(self, file_name: str) -> bytes:
42
- if isinstance(self._volume, PartedVolume):
43
- vol_tunnel = VolumeTunnel(self._odps_entry)
44
- session = vol_tunnel.create_download_session(
45
- self._volume, self._volume_dir, file_name
46
- )
47
- with session.open() as reader:
48
- return reader.read()
49
- elif isinstance(self._volume, ExternalVolume):
50
- with self._volume.open_reader(self._volume_dir + "/" + file_name) as reader:
51
- return reader.read()
52
-
53
-
54
- class ODPSVolumeWriter:
55
- def __init__(self, odps_entry: ODPS, volume_name: str, volume_dir: str):
56
- self._odps_entry = odps_entry
57
- self._volume = odps_entry.get_volume(volume_name)
58
- self._volume_dir = volume_dir
59
- self._session_cache = None
60
-
61
- def create_write_session(self) -> Optional[str]:
62
- if not isinstance(self._volume, PartedVolume):
63
- return None
64
- vol_tunnel = VolumeTunnel(self._odps_entry)
65
- session = self._session_cache = vol_tunnel.create_upload_session(
66
- self._volume, self._volume_dir
67
- )
68
- return session.id
69
-
70
- def _get_existing_upload_session(self, write_session_id: Optional[str]):
71
- if self._session_cache is not None and (
72
- write_session_id is None or write_session_id == self._session_cache.id
73
- ):
74
- return self._session_cache
75
- vol_tunnel = VolumeTunnel(self._odps_entry)
76
- return vol_tunnel.create_upload_session(
77
- self._volume, self._volume_dir, write_session_id
78
- )
79
-
80
- def write_file(
81
- self, file_name: str, data: bytes, write_session_id: Optional[str] = None
82
- ):
83
- if isinstance(self._volume, PartedVolume):
84
- session = self._get_existing_upload_session(write_session_id)
85
- with session.open(file_name) as writer:
86
- writer.write(data)
87
- elif isinstance(self._volume, ExternalVolume):
88
- with self._volume.open_writer(self._volume_dir + "/" + file_name) as writer:
89
- writer.write(data)
90
-
91
- def commit(self, files: List[str], write_session_id: Optional[str] = None):
92
- if not isinstance(self._volume, PartedVolume):
93
- return None
94
- session = self._get_existing_upload_session(write_session_id)
95
- session.commit(files)
@@ -1,104 +0,0 @@
1
- # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import Any, Dict, Optional
16
-
17
- from tornado import httpclient
18
-
19
- from maxframe.core import TileableGraph
20
- from maxframe.protocol import ExecuteSubDagRequest, ProtocolBody, SubDagInfo
21
- from maxframe.typing_ import TimeoutType
22
- from maxframe.utils import (
23
- deserialize_serializable,
24
- format_timeout_params,
25
- serialize_serializable,
26
- wait_http_response,
27
- )
28
-
29
-
30
- class SPEClient:
31
- def __init__(
32
- self,
33
- endpoint: str,
34
- session_id: Optional[str] = None,
35
- host: str = None,
36
- ):
37
- self._endpoint = endpoint.rstrip("/")
38
- self._session_id = session_id
39
- self._headers = {"Host": host}
40
-
41
- @staticmethod
42
- def _load_subdag_info(resp: httpclient.HTTPResponse) -> SubDagInfo:
43
- res: ProtocolBody[SubDagInfo] = deserialize_serializable(resp.body)
44
- return res.body
45
-
46
- async def submit_subdag(
47
- self, subdag: TileableGraph, settings: Dict[str, Any] = None
48
- ) -> SubDagInfo:
49
- req_url = f"{self._endpoint}/api/subdags"
50
- req_body: ProtocolBody[ExecuteSubDagRequest] = ProtocolBody(
51
- body=ExecuteSubDagRequest(dag=subdag, settings=settings),
52
- )
53
-
54
- if self._session_id is not None:
55
- req_url += f"?session_id={self._session_id}"
56
-
57
- resp = await httpclient.AsyncHTTPClient().fetch(
58
- req_url,
59
- method="POST",
60
- headers=self._headers,
61
- body=serialize_serializable(req_body),
62
- )
63
- return self._load_subdag_info(resp)
64
-
65
- async def get_subdag_info(self, subdag_id: str) -> SubDagInfo:
66
- req_url = f"{self._endpoint}/api/subdags/{subdag_id}?wait=0"
67
- resp = await httpclient.AsyncHTTPClient().fetch(
68
- req_url,
69
- method="GET",
70
- headers=self._headers,
71
- )
72
- return self._load_subdag_info(resp)
73
-
74
- async def wait_subdag(
75
- self, subdag_id: str, wait_timeout: TimeoutType = None
76
- ) -> SubDagInfo:
77
- req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
78
- params = format_timeout_params(wait_timeout)
79
- try:
80
- resp = await wait_http_response(
81
- req_url + params,
82
- method="GET",
83
- headers=self._headers,
84
- request_timeout=wait_timeout,
85
- )
86
- return self._load_subdag_info(resp)
87
- except TimeoutError:
88
- return await self.get_subdag_info(subdag_id)
89
-
90
- async def cancel_subdag(
91
- self, subdag_id: str, wait_timeout: TimeoutType = None
92
- ) -> SubDagInfo:
93
- req_url = f"{self._endpoint}/api/subdags/{subdag_id}"
94
- params = format_timeout_params(wait_timeout)
95
- try:
96
- resp = await wait_http_response(
97
- req_url + params,
98
- method="DELETE",
99
- headers=self._headers,
100
- request_timeout=wait_timeout,
101
- )
102
- return self._load_subdag_info(resp)
103
- except TimeoutError:
104
- return await self.get_subdag_info(subdag_id)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes