maxframe 0.1.0b4__cp39-cp39-win_amd64.whl → 1.0.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp39-win_amd64.pyd +0 -0
- maxframe/codegen.py +56 -5
- maxframe/config/config.py +78 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp39-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +2 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +58 -12
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +120 -24
- maxframe/dataframe/datasource/read_odps_table.py +9 -4
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +6 -11
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +93 -1
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +4 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +33 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +110 -0
- maxframe/learn/contrib/xgboost/core.py +241 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
- maxframe/learn/contrib/xgboost/predict.py +121 -0
- maxframe/learn/contrib/xgboost/regressor.py +71 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +132 -0
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp39-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +11 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp39-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +37 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +7 -2
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +4 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/misc/unique.py +205 -0
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +62 -3
- maxframe/utils.py +112 -86
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +123 -54
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +223 -40
- maxframe_client/session/task.py +108 -80
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +136 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -300
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe/protocol.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import base64
|
|
16
16
|
import enum
|
|
17
17
|
import uuid
|
|
18
|
-
from typing import Any, Dict, Generic, List, Optional,
|
|
18
|
+
from typing import Any, Dict, Generic, List, Optional, Type, TypeVar
|
|
19
19
|
|
|
20
20
|
import pandas as pd
|
|
21
21
|
|
|
@@ -32,12 +32,12 @@ from .serialization.serializables import (
|
|
|
32
32
|
EnumField,
|
|
33
33
|
FieldTypes,
|
|
34
34
|
Float64Field,
|
|
35
|
+
Int32Field,
|
|
35
36
|
ListField,
|
|
36
37
|
ReferenceField,
|
|
37
38
|
Serializable,
|
|
38
39
|
SeriesField,
|
|
39
40
|
StringField,
|
|
40
|
-
TupleField,
|
|
41
41
|
)
|
|
42
42
|
|
|
43
43
|
pickling_support.install()
|
|
@@ -71,6 +71,9 @@ class DagStatus(enum.Enum):
|
|
|
71
71
|
CANCELLING = 4
|
|
72
72
|
CANCELLED = 5
|
|
73
73
|
|
|
74
|
+
def is_terminated(self):
|
|
75
|
+
return self in (DagStatus.CANCELLED, DagStatus.SUCCEEDED, DagStatus.FAILED)
|
|
76
|
+
|
|
74
77
|
|
|
75
78
|
class DimensionIndex(Serializable):
|
|
76
79
|
is_slice: bool = BoolField("is_slice", default=None)
|
|
@@ -88,19 +91,6 @@ class DataSerializeType(enum.Enum):
|
|
|
88
91
|
PICKLE = 0
|
|
89
92
|
|
|
90
93
|
|
|
91
|
-
class VolumeDataMeta(Serializable):
|
|
92
|
-
output_type: OutputType = EnumField(
|
|
93
|
-
"output_type", OutputType, FieldTypes.int8, default=None
|
|
94
|
-
)
|
|
95
|
-
serial_type: DataSerializeType = EnumField(
|
|
96
|
-
"serial_type", DataSerializeType, FieldTypes.int8, default=None
|
|
97
|
-
)
|
|
98
|
-
shape: Tuple[int, ...] = TupleField("shape", FieldTypes.int64, default=None)
|
|
99
|
-
nsplits: Tuple[Tuple[int, ...], ...] = TupleField(
|
|
100
|
-
"nsplits", FieldTypes.tuple(FieldTypes.tuple(FieldTypes.int64)), default=None
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
|
|
104
94
|
_result_type_to_info_cls: Dict[ResultType, Type["ResultInfo"]] = dict()
|
|
105
95
|
|
|
106
96
|
|
|
@@ -150,6 +140,9 @@ class ODPSTableResultInfo(ResultInfo):
|
|
|
150
140
|
partition_specs: Optional[List[str]] = ListField(
|
|
151
141
|
"partition_specs", FieldTypes.string, default=None
|
|
152
142
|
)
|
|
143
|
+
table_meta: Optional["DataFrameTableMeta"] = ReferenceField(
|
|
144
|
+
"table_meta", default=None
|
|
145
|
+
)
|
|
153
146
|
|
|
154
147
|
def __init__(self, result_type: ResultType = None, **kw):
|
|
155
148
|
result_type = result_type or ResultType.ODPS_TABLE
|
|
@@ -160,8 +153,17 @@ class ODPSTableResultInfo(ResultInfo):
|
|
|
160
153
|
ret["full_table_name"] = self.full_table_name
|
|
161
154
|
if self.partition_specs:
|
|
162
155
|
ret["partition_specs"] = self.partition_specs
|
|
156
|
+
if self.table_meta:
|
|
157
|
+
ret["table_meta"] = self.table_meta.to_json()
|
|
163
158
|
return ret
|
|
164
159
|
|
|
160
|
+
@classmethod
|
|
161
|
+
def _json_to_kwargs(cls, serialized: dict) -> dict:
|
|
162
|
+
kw = super()._json_to_kwargs(serialized)
|
|
163
|
+
if "table_meta" in kw:
|
|
164
|
+
kw["table_meta"] = DataFrameTableMeta.from_json(kw["table_meta"])
|
|
165
|
+
return kw
|
|
166
|
+
|
|
165
167
|
|
|
166
168
|
class ODPSVolumeResultInfo(ResultInfo):
|
|
167
169
|
_result_type = ResultType.ODPS_VOLUME
|
|
@@ -190,9 +192,9 @@ class ErrorInfo(JsonSerializable):
|
|
|
190
192
|
"error_tracebacks", FieldTypes.list
|
|
191
193
|
)
|
|
192
194
|
raw_error_source: ErrorSource = EnumField(
|
|
193
|
-
"raw_error_source", ErrorSource, FieldTypes.int8
|
|
195
|
+
"raw_error_source", ErrorSource, FieldTypes.int8, default=None
|
|
194
196
|
)
|
|
195
|
-
raw_error_data: Optional[Exception] = AnyField("raw_error_data")
|
|
197
|
+
raw_error_data: Optional[Exception] = AnyField("raw_error_data", default=None)
|
|
196
198
|
|
|
197
199
|
@classmethod
|
|
198
200
|
def from_exception(cls, exc: Exception):
|
|
@@ -201,20 +203,29 @@ class ErrorInfo(JsonSerializable):
|
|
|
201
203
|
return cls(messages, tracebacks, ErrorSource.PYTHON, exc)
|
|
202
204
|
|
|
203
205
|
def reraise(self):
|
|
204
|
-
if
|
|
206
|
+
if (
|
|
207
|
+
self.raw_error_source == ErrorSource.PYTHON
|
|
208
|
+
and self.raw_error_data is not None
|
|
209
|
+
):
|
|
205
210
|
raise self.raw_error_data
|
|
206
211
|
raise RemoteException(self.error_messages, self.error_tracebacks, [])
|
|
207
212
|
|
|
208
213
|
@classmethod
|
|
209
214
|
def from_json(cls, serialized: dict) -> "ErrorInfo":
|
|
210
215
|
kw = serialized.copy()
|
|
211
|
-
kw
|
|
216
|
+
if kw.get("raw_error_source") is not None:
|
|
217
|
+
kw["raw_error_source"] = ErrorSource(serialized["raw_error_source"])
|
|
218
|
+
else:
|
|
219
|
+
kw["raw_error_source"] = None
|
|
220
|
+
|
|
212
221
|
if kw.get("raw_error_data"):
|
|
213
222
|
bufs = [base64.b64decode(s) for s in kw["raw_error_data"]]
|
|
214
223
|
try:
|
|
215
224
|
kw["raw_error_data"] = pickle.loads(bufs[0], buffers=bufs[1:])
|
|
216
225
|
except:
|
|
217
|
-
|
|
226
|
+
# both error source and data shall be None to make sure
|
|
227
|
+
# RemoteException is raised.
|
|
228
|
+
kw["raw_error_source"] = kw["raw_error_data"] = None
|
|
218
229
|
return cls(**kw)
|
|
219
230
|
|
|
220
231
|
def to_json(self) -> dict:
|
|
@@ -227,7 +238,12 @@ class ErrorInfo(JsonSerializable):
|
|
|
227
238
|
if isinstance(self.raw_error_data, (PickleContainer, RemoteException)):
|
|
228
239
|
err_data_bufs = self.raw_error_data.get_buffers()
|
|
229
240
|
elif isinstance(self.raw_error_data, BaseException):
|
|
230
|
-
|
|
241
|
+
try:
|
|
242
|
+
err_data_bufs = pickle_buffers(self.raw_error_data)
|
|
243
|
+
except:
|
|
244
|
+
err_data_bufs = None
|
|
245
|
+
ret["raw_error_source"] = None
|
|
246
|
+
|
|
231
247
|
if err_data_bufs:
|
|
232
248
|
ret["raw_error_data"] = [
|
|
233
249
|
base64.b64encode(s).decode() for s in err_data_bufs
|
|
@@ -249,9 +265,17 @@ class DagInfo(JsonSerializable):
|
|
|
249
265
|
error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
|
|
250
266
|
start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
|
|
251
267
|
end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
|
|
268
|
+
subdag_infos: Dict[str, "SubDagInfo"] = DictField(
|
|
269
|
+
"subdag_infos",
|
|
270
|
+
key_type=FieldTypes.string,
|
|
271
|
+
value_type=FieldTypes.reference,
|
|
272
|
+
default_factory=dict,
|
|
273
|
+
)
|
|
252
274
|
|
|
253
275
|
@classmethod
|
|
254
|
-
def from_json(cls, serialized: dict) -> "DagInfo":
|
|
276
|
+
def from_json(cls, serialized: dict) -> Optional["DagInfo"]:
|
|
277
|
+
if serialized is None:
|
|
278
|
+
return None
|
|
255
279
|
kw = serialized.copy()
|
|
256
280
|
kw["status"] = DagStatus(kw["status"])
|
|
257
281
|
if kw.get("tileable_to_result_infos"):
|
|
@@ -261,6 +285,10 @@ class DagInfo(JsonSerializable):
|
|
|
261
285
|
}
|
|
262
286
|
if kw.get("error_info"):
|
|
263
287
|
kw["error_info"] = ErrorInfo.from_json(kw["error_info"])
|
|
288
|
+
if kw.get("subdag_infos"):
|
|
289
|
+
kw["subdag_infos"] = {
|
|
290
|
+
k: SubDagInfo.from_json(v) for k, v in kw["subdag_infos"].items()
|
|
291
|
+
}
|
|
264
292
|
return DagInfo(**kw)
|
|
265
293
|
|
|
266
294
|
def to_json(self) -> dict:
|
|
@@ -279,6 +307,8 @@ class DagInfo(JsonSerializable):
|
|
|
279
307
|
}
|
|
280
308
|
if self.error_info:
|
|
281
309
|
ret["error_info"] = self.error_info.to_json()
|
|
310
|
+
if self.subdag_infos:
|
|
311
|
+
ret["subdag_infos"] = {k: v.to_json() for k, v in self.subdag_infos.items()}
|
|
282
312
|
return ret
|
|
283
313
|
|
|
284
314
|
|
|
@@ -302,7 +332,9 @@ class SessionInfo(JsonSerializable):
|
|
|
302
332
|
error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
|
|
303
333
|
|
|
304
334
|
@classmethod
|
|
305
|
-
def from_json(cls, serialized: dict) -> "SessionInfo":
|
|
335
|
+
def from_json(cls, serialized: dict) -> Optional["SessionInfo"]:
|
|
336
|
+
if serialized is None:
|
|
337
|
+
return None
|
|
306
338
|
kw = serialized.copy()
|
|
307
339
|
if kw.get("dag_infos"):
|
|
308
340
|
kw["dag_infos"] = {
|
|
@@ -320,7 +352,10 @@ class SessionInfo(JsonSerializable):
|
|
|
320
352
|
"idle_timestamp": self.idle_timestamp,
|
|
321
353
|
}
|
|
322
354
|
if self.dag_infos:
|
|
323
|
-
ret["dag_infos"] = {
|
|
355
|
+
ret["dag_infos"] = {
|
|
356
|
+
k: v.to_json() if v is not None else None
|
|
357
|
+
for k, v in self.dag_infos.items()
|
|
358
|
+
}
|
|
324
359
|
if self.error_info:
|
|
325
360
|
ret["error_info"] = self.error_info.to_json()
|
|
326
361
|
return ret
|
|
@@ -340,9 +375,32 @@ class ExecuteDagRequest(Serializable):
|
|
|
340
375
|
value_type=FieldTypes.reference,
|
|
341
376
|
default=None,
|
|
342
377
|
)
|
|
378
|
+
new_settings: Dict[str, Any] = DictField(
|
|
379
|
+
"new_settings",
|
|
380
|
+
key_type=FieldTypes.string,
|
|
381
|
+
default=None,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class SubDagSubmitInstanceInfo(JsonSerializable):
|
|
386
|
+
submit_reason: str = StringField("submit_reason")
|
|
387
|
+
instance_id: str = StringField("instance_id")
|
|
388
|
+
subquery_id: Optional[int] = Int32Field("subquery_id", default=None)
|
|
389
|
+
|
|
390
|
+
@classmethod
|
|
391
|
+
def from_json(cls, serialized: dict) -> "SubDagSubmitInstanceInfo":
|
|
392
|
+
return SubDagSubmitInstanceInfo(**serialized)
|
|
393
|
+
|
|
394
|
+
def to_json(self) -> dict:
|
|
395
|
+
ret = {
|
|
396
|
+
"submit_reason": self.submit_reason,
|
|
397
|
+
"instance_id": self.instance_id,
|
|
398
|
+
"subquery_id": self.subquery_id,
|
|
399
|
+
}
|
|
400
|
+
return ret
|
|
343
401
|
|
|
344
402
|
|
|
345
|
-
class SubDagInfo(
|
|
403
|
+
class SubDagInfo(JsonSerializable):
|
|
346
404
|
subdag_id: str = StringField("subdag_id")
|
|
347
405
|
status: DagStatus = EnumField("status", DagStatus, FieldTypes.int8, default=None)
|
|
348
406
|
progress: float = Float64Field("progress", default=None)
|
|
@@ -355,9 +413,52 @@ class SubDagInfo(Serializable):
|
|
|
355
413
|
FieldTypes.reference,
|
|
356
414
|
default_factory=dict,
|
|
357
415
|
)
|
|
416
|
+
start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
|
|
417
|
+
end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
|
|
418
|
+
submit_instances: List[SubDagSubmitInstanceInfo] = ListField(
|
|
419
|
+
"submit_instances",
|
|
420
|
+
FieldTypes.reference,
|
|
421
|
+
default_factory=list,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
@classmethod
|
|
425
|
+
def from_json(cls, serialized: dict) -> "SubDagInfo":
|
|
426
|
+
kw = serialized.copy()
|
|
427
|
+
kw["status"] = DagStatus(kw["status"])
|
|
428
|
+
if kw.get("tileable_to_result_infos"):
|
|
429
|
+
kw["tileable_to_result_infos"] = {
|
|
430
|
+
k: ResultInfo.from_json(s)
|
|
431
|
+
for k, s in kw["tileable_to_result_infos"].items()
|
|
432
|
+
}
|
|
433
|
+
if kw.get("error_info"):
|
|
434
|
+
kw["error_info"] = ErrorInfo.from_json(kw["error_info"])
|
|
435
|
+
if kw.get("submit_instances"):
|
|
436
|
+
kw["submit_instances"] = [
|
|
437
|
+
SubDagSubmitInstanceInfo.from_json(s) for s in kw["submit_instances"]
|
|
438
|
+
]
|
|
439
|
+
return SubDagInfo(**kw)
|
|
440
|
+
|
|
441
|
+
def to_json(self) -> dict:
|
|
442
|
+
ret = {
|
|
443
|
+
"subdag_id": self.subdag_id,
|
|
444
|
+
"status": self.status.value,
|
|
445
|
+
"progress": self.progress,
|
|
446
|
+
"start_timestamp": self.start_timestamp,
|
|
447
|
+
"end_timestamp": self.end_timestamp,
|
|
448
|
+
}
|
|
449
|
+
if self.error_info:
|
|
450
|
+
ret["error_info"] = self.error_info.to_json()
|
|
451
|
+
if self.tileable_to_result_infos:
|
|
452
|
+
ret["tileable_to_result_infos"] = {
|
|
453
|
+
k: v.to_json() for k, v in self.tileable_to_result_infos.items()
|
|
454
|
+
}
|
|
455
|
+
if self.submit_instances:
|
|
456
|
+
ret["submit_instances"] = [i.to_json() for i in self.submit_instances]
|
|
457
|
+
return ret
|
|
358
458
|
|
|
359
459
|
|
|
360
460
|
class ExecuteSubDagRequest(Serializable):
|
|
461
|
+
subdag_id: str = StringField("subdag_id")
|
|
361
462
|
dag: TileableGraph = ReferenceField(
|
|
362
463
|
"dag",
|
|
363
464
|
on_serialize=SerializableGraph.from_graph,
|
|
@@ -371,7 +472,7 @@ class DecrefRequest(Serializable):
|
|
|
371
472
|
keys: List[str] = ListField("keys", FieldTypes.string, default=None)
|
|
372
473
|
|
|
373
474
|
|
|
374
|
-
class DataFrameTableMeta(
|
|
475
|
+
class DataFrameTableMeta(JsonSerializable):
|
|
375
476
|
__slots__ = "_pd_column_names", "_pd_index_level_names"
|
|
376
477
|
|
|
377
478
|
table_name: Optional[str] = StringField("table_name", default=None)
|
|
@@ -402,7 +503,7 @@ class DataFrameTableMeta(Serializable):
|
|
|
402
503
|
self._pd_index_level_names = self.pd_index_dtypes.index.tolist()
|
|
403
504
|
return self._pd_index_level_names
|
|
404
505
|
|
|
405
|
-
def __eq__(self, other: "
|
|
506
|
+
def __eq__(self, other: "DataFrameTableMeta") -> bool:
|
|
406
507
|
if not isinstance(other, type(self)):
|
|
407
508
|
return False
|
|
408
509
|
for k in self._FIELDS:
|
|
@@ -413,3 +514,29 @@ class DataFrameTableMeta(Serializable):
|
|
|
413
514
|
if not is_same:
|
|
414
515
|
return False
|
|
415
516
|
return True
|
|
517
|
+
|
|
518
|
+
def to_json(self) -> dict:
|
|
519
|
+
b64_pk = lambda x: base64.b64encode(pickle.dumps(x)).decode()
|
|
520
|
+
ret = {
|
|
521
|
+
"table_name": self.table_name,
|
|
522
|
+
"type": self.type.value,
|
|
523
|
+
"table_column_names": self.table_column_names,
|
|
524
|
+
"table_index_column_names": self.table_index_column_names,
|
|
525
|
+
"pd_column_dtypes": b64_pk(self.pd_column_dtypes),
|
|
526
|
+
"pd_column_level_names": b64_pk(self.pd_column_level_names),
|
|
527
|
+
"pd_index_dtypes": b64_pk(self.pd_index_dtypes),
|
|
528
|
+
}
|
|
529
|
+
return ret
|
|
530
|
+
|
|
531
|
+
@classmethod
|
|
532
|
+
def from_json(cls, serialized: dict) -> "DataFrameTableMeta":
|
|
533
|
+
b64_upk = lambda x: pickle.loads(base64.b64decode(x))
|
|
534
|
+
serialized.update(
|
|
535
|
+
{
|
|
536
|
+
"type": OutputType(serialized["type"]),
|
|
537
|
+
"pd_column_dtypes": b64_upk(serialized["pd_column_dtypes"]),
|
|
538
|
+
"pd_column_level_names": b64_upk(serialized["pd_column_level_names"]),
|
|
539
|
+
"pd_index_dtypes": b64_upk(serialized["pd_index_dtypes"]),
|
|
540
|
+
}
|
|
541
|
+
)
|
|
542
|
+
return DataFrameTableMeta(**serialized)
|
maxframe/remote/core.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from functools import partial
|
|
16
16
|
|
|
17
17
|
from .. import opcodes
|
|
18
|
-
from ..core import ENTITY_TYPE
|
|
18
|
+
from ..core import ENTITY_TYPE
|
|
19
19
|
from ..core.operator import ObjectOperator, ObjectOperatorMixin
|
|
20
20
|
from ..dataframe.core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
21
21
|
from ..serialization.serializables import (
|
|
@@ -26,7 +26,7 @@ from ..serialization.serializables import (
|
|
|
26
26
|
ListField,
|
|
27
27
|
)
|
|
28
28
|
from ..tensor.core import TENSOR_TYPE
|
|
29
|
-
from ..utils import
|
|
29
|
+
from ..utils import find_objects, replace_objects
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
|
|
@@ -63,12 +63,8 @@ class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
|
|
|
63
63
|
if raw_inputs is not None:
|
|
64
64
|
for raw_inp in raw_inputs:
|
|
65
65
|
if self._no_prepare(raw_inp):
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
mapping[raw_inp] = next(function_inputs)
|
|
69
|
-
else:
|
|
70
|
-
# in tile, set_inputs from chunk
|
|
71
|
-
mapping[raw_inp] = build_fetch_tileable(raw_inp)
|
|
66
|
+
# not in tile, set_inputs from tileable
|
|
67
|
+
mapping[raw_inp] = next(function_inputs)
|
|
72
68
|
else:
|
|
73
69
|
mapping[raw_inp] = next(function_inputs)
|
|
74
70
|
self.function_args = replace_objects(self.function_args, mapping)
|
|
Binary file
|
maxframe/serialization/core.pxd
CHANGED
|
@@ -18,6 +18,9 @@ from libc.stdint cimport int32_t, uint64_t
|
|
|
18
18
|
cdef class Serializer:
|
|
19
19
|
cdef int _serializer_id
|
|
20
20
|
|
|
21
|
+
cpdef bint is_public_data_exist(self, dict context, object key)
|
|
22
|
+
cpdef put_public_data(self, dict context, object key, object value)
|
|
23
|
+
cpdef get_public_data(self, dict context, object key)
|
|
21
24
|
cpdef serial(self, object obj, dict context)
|
|
22
25
|
cpdef deserial(self, list serialized, dict context, list subs)
|
|
23
26
|
cpdef on_deserial_error(
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from concurrent.futures import Executor
|
|
16
|
+
from typing import Any, Callable, Dict, List, TypeVar
|
|
17
|
+
|
|
18
|
+
def buffered(func: Callable) -> Callable: ...
|
|
19
|
+
def fast_id(obj: Any) -> int: ...
|
|
20
|
+
|
|
21
|
+
LoadType = TypeVar("LoadType")
|
|
22
|
+
|
|
23
|
+
def load_type(class_name: str, parent_class: LoadType) -> LoadType: ...
|
|
24
|
+
|
|
25
|
+
class PickleContainer:
|
|
26
|
+
def __init__(self, buffers: List[bytes]): ...
|
|
27
|
+
def get(self) -> Any: ...
|
|
28
|
+
def get_buffers(self) -> List[bytes]: ...
|
|
29
|
+
|
|
30
|
+
class Serializer:
|
|
31
|
+
serializer_id: int
|
|
32
|
+
def is_public_data_exist(self, context: Dict, key: Any) -> bool: ...
|
|
33
|
+
def put_public_data(self, context: Dict, key: Any, value: Any) -> None: ...
|
|
34
|
+
def get_public_data(self, context: Dict, key: Any) -> Any: ...
|
|
35
|
+
def serial(self, obj: Any, context: Dict): ...
|
|
36
|
+
def deserial(self, serialized: List, context: Dict, subs: List[Any]): ...
|
|
37
|
+
def on_deserial_error(
|
|
38
|
+
self,
|
|
39
|
+
serialized: List,
|
|
40
|
+
context: Dict,
|
|
41
|
+
subs_serialized: List,
|
|
42
|
+
error_index: int,
|
|
43
|
+
exc: BaseException,
|
|
44
|
+
): ...
|
|
45
|
+
@classmethod
|
|
46
|
+
def register(cls, obj_type): ...
|
|
47
|
+
@classmethod
|
|
48
|
+
def unregister(cls, obj_type): ...
|
|
49
|
+
|
|
50
|
+
class Placeholder:
|
|
51
|
+
id: int
|
|
52
|
+
callbacks: List[Callable]
|
|
53
|
+
def __init__(self, id_: int): ...
|
|
54
|
+
def __hash__(self): ...
|
|
55
|
+
def __eq__(self, other): ...
|
|
56
|
+
|
|
57
|
+
def serialize(obj: Any, context: Dict = None): ...
|
|
58
|
+
async def serialize_with_spawn(
|
|
59
|
+
obj: Any,
|
|
60
|
+
context: Dict = None,
|
|
61
|
+
spawn_threshold: int = 100,
|
|
62
|
+
executor: Executor = None,
|
|
63
|
+
): ...
|
|
64
|
+
def deserialize(headers: List, buffers: List, context: Dict = None): ...
|
maxframe/serialization/core.pyx
CHANGED
|
@@ -37,7 +37,7 @@ from .._utils import NamedType
|
|
|
37
37
|
from .._utils cimport TypeDispatcher
|
|
38
38
|
|
|
39
39
|
from ..lib import wrapped_pickle as pickle
|
|
40
|
-
from ..utils import arrow_type_from_str
|
|
40
|
+
from ..utils import NoDefault, arrow_type_from_str, no_default
|
|
41
41
|
|
|
42
42
|
try:
|
|
43
43
|
from pandas import ArrowDtype
|
|
@@ -94,6 +94,7 @@ cdef:
|
|
|
94
94
|
int COMPLEX_SERIALIZER = 12
|
|
95
95
|
int SLICE_SERIALIZER = 13
|
|
96
96
|
int REGEX_SERIALIZER = 14
|
|
97
|
+
int NO_DEFAULT_SERIALIZER = 15
|
|
97
98
|
int PLACEHOLDER_SERIALIZER = 4096
|
|
98
99
|
|
|
99
100
|
|
|
@@ -130,11 +131,30 @@ cdef Serializer get_deserializer(int32_t deserializer_id):
|
|
|
130
131
|
|
|
131
132
|
cdef class Serializer:
|
|
132
133
|
serializer_id = None
|
|
134
|
+
_public_data_context_key = 0x7fffffff - 1
|
|
133
135
|
|
|
134
136
|
def __cinit__(self):
|
|
135
137
|
# make the value can be referenced with C code
|
|
136
138
|
self._serializer_id = self.serializer_id
|
|
137
139
|
|
|
140
|
+
cpdef bint is_public_data_exist(self, dict context, object key):
|
|
141
|
+
cdef dict public_dict = context.get(self._public_data_context_key, None)
|
|
142
|
+
if public_dict is None:
|
|
143
|
+
return False
|
|
144
|
+
return key in public_dict
|
|
145
|
+
|
|
146
|
+
cpdef put_public_data(self, dict context, object key, object value):
|
|
147
|
+
cdef dict public_dict = context.get(self._public_data_context_key, None)
|
|
148
|
+
if public_dict is None:
|
|
149
|
+
public_dict = context[self._public_data_context_key] = {}
|
|
150
|
+
public_dict[key] = value
|
|
151
|
+
|
|
152
|
+
cpdef get_public_data(self, dict context, object key):
|
|
153
|
+
cdef dict public_dict = context.get(self._public_data_context_key, None)
|
|
154
|
+
if public_dict is None:
|
|
155
|
+
return None
|
|
156
|
+
return public_dict.get(key)
|
|
157
|
+
|
|
138
158
|
cpdef serial(self, object obj, dict context):
|
|
139
159
|
"""
|
|
140
160
|
Returns intermediate serialization result of certain object.
|
|
@@ -784,6 +804,16 @@ cdef class RegexSerializer(Serializer):
|
|
|
784
804
|
return re.compile((<bytes>(subs[0])).decode(), serialized[0])
|
|
785
805
|
|
|
786
806
|
|
|
807
|
+
cdef class NoDefaultSerializer(Serializer):
|
|
808
|
+
serializer_id = NO_DEFAULT_SERIALIZER
|
|
809
|
+
|
|
810
|
+
cpdef serial(self, object obj, dict context):
|
|
811
|
+
return [], [], True
|
|
812
|
+
|
|
813
|
+
cpdef deserial(self, list obj, dict context, list subs):
|
|
814
|
+
return no_default
|
|
815
|
+
|
|
816
|
+
|
|
787
817
|
cdef class Placeholder:
|
|
788
818
|
"""
|
|
789
819
|
Placeholder object to reduce duplicated serialization
|
|
@@ -838,6 +868,7 @@ DtypeSerializer.register(ExtensionDtype)
|
|
|
838
868
|
ComplexSerializer.register(complex)
|
|
839
869
|
SliceSerializer.register(slice)
|
|
840
870
|
RegexSerializer.register(re.Pattern)
|
|
871
|
+
NoDefaultSerializer.register(NoDefault)
|
|
841
872
|
PlaceholderSerializer.register(Placeholder)
|
|
842
873
|
|
|
843
874
|
|
|
@@ -993,17 +1024,20 @@ def serialize(obj, dict context = None):
|
|
|
993
1024
|
cdef list subs
|
|
994
1025
|
cdef bint final
|
|
995
1026
|
cdef _IdContextHolder id_context_holder = _IdContextHolder()
|
|
1027
|
+
cdef tuple result
|
|
996
1028
|
|
|
997
1029
|
context = context if context is not None else dict()
|
|
998
1030
|
serialized, subs, final = _serial_single(obj, context, id_context_holder)
|
|
999
1031
|
if final or not subs:
|
|
1000
1032
|
# marked as a leaf node, return directly
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1033
|
+
result = [{}, serialized], subs
|
|
1034
|
+
else:
|
|
1035
|
+
serial_stack.append(_SerialStackItem(serialized, subs))
|
|
1036
|
+
result = _serialize_with_stack(
|
|
1037
|
+
serial_stack, None, context, id_context_holder, result_bufs_list
|
|
1038
|
+
)
|
|
1039
|
+
result[0][0]["_PUB"] = context.get(Serializer._public_data_context_key)
|
|
1040
|
+
return result
|
|
1007
1041
|
|
|
1008
1042
|
|
|
1009
1043
|
async def serialize_with_spawn(
|
|
@@ -1036,31 +1070,38 @@ async def serialize_with_spawn(
|
|
|
1036
1070
|
cdef list subs
|
|
1037
1071
|
cdef bint final
|
|
1038
1072
|
cdef _IdContextHolder id_context_holder = _IdContextHolder()
|
|
1073
|
+
cdef tuple result
|
|
1039
1074
|
|
|
1040
1075
|
context = context if context is not None else dict()
|
|
1041
1076
|
serialized, subs, final = _serial_single(obj, context, id_context_holder)
|
|
1042
1077
|
if final or not subs:
|
|
1043
1078
|
# marked as a leaf node, return directly
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1079
|
+
result = [{}, serialized], subs
|
|
1080
|
+
else:
|
|
1081
|
+
serial_stack.append(_SerialStackItem(serialized, subs))
|
|
1047
1082
|
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1083
|
+
try:
|
|
1084
|
+
result = _serialize_with_stack(
|
|
1085
|
+
serial_stack,
|
|
1086
|
+
None,
|
|
1087
|
+
context,
|
|
1088
|
+
id_context_holder,
|
|
1089
|
+
result_bufs_list,
|
|
1090
|
+
spawn_threshold,
|
|
1091
|
+
)
|
|
1092
|
+
except _SerializeObjectOverflow as ex:
|
|
1093
|
+
result = await asyncio.get_running_loop().run_in_executor(
|
|
1094
|
+
executor,
|
|
1095
|
+
_serialize_with_stack,
|
|
1096
|
+
serial_stack,
|
|
1097
|
+
ex.cur_serialized,
|
|
1098
|
+
context,
|
|
1099
|
+
id_context_holder,
|
|
1100
|
+
result_bufs_list,
|
|
1101
|
+
0,
|
|
1102
|
+
ex.num_total_serialized,
|
|
1103
|
+
)
|
|
1104
|
+
result[0][0]["_PUB"] = context.get(Serializer._public_data_context_key)
|
|
1064
1105
|
return result
|
|
1065
1106
|
|
|
1066
1107
|
|