maxframe 0.1.0b4__cp311-cp311-win32.whl → 1.0.0__cp311-cp311-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp311-win32.pyd +0 -0
- maxframe/codegen.py +56 -5
- maxframe/config/config.py +78 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp311-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +2 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +58 -12
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +120 -24
- maxframe/dataframe/datasource/read_odps_table.py +9 -4
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +6 -11
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +93 -1
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +4 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +33 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +110 -0
- maxframe/learn/contrib/xgboost/core.py +241 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
- maxframe/learn/contrib/xgboost/predict.py +121 -0
- maxframe/learn/contrib/xgboost/regressor.py +71 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +132 -0
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp311-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +11 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp311-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +37 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +7 -2
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +4 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/misc/unique.py +205 -0
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +62 -3
- maxframe/utils.py +112 -86
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +123 -54
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +223 -40
- maxframe_client/session/task.py +108 -80
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +136 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -300
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe_client/session/odps.py
CHANGED
|
@@ -14,28 +14,44 @@
|
|
|
14
14
|
|
|
15
15
|
import abc
|
|
16
16
|
import asyncio
|
|
17
|
+
import copy
|
|
17
18
|
import logging
|
|
18
19
|
import time
|
|
19
20
|
import weakref
|
|
20
21
|
from numbers import Integral
|
|
21
|
-
from typing import Dict, List, Mapping, Optional, Tuple, Union
|
|
22
|
+
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
|
|
22
23
|
from urllib.parse import urlparse
|
|
23
24
|
|
|
24
25
|
import numpy as np
|
|
25
26
|
import pandas as pd
|
|
26
27
|
from odps import ODPS
|
|
28
|
+
from odps import options as odps_options
|
|
29
|
+
from odps.console import in_ipython_frontend
|
|
27
30
|
|
|
28
31
|
from maxframe.config import options
|
|
29
|
-
from maxframe.core import Entity, TileableGraph, enter_mode
|
|
32
|
+
from maxframe.core import Entity, TileableGraph, build_fetch, enter_mode
|
|
33
|
+
from maxframe.core.operator import Fetch
|
|
30
34
|
from maxframe.dataframe import read_odps_table
|
|
31
35
|
from maxframe.dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
32
36
|
from maxframe.dataframe.datasource import PandasDataSourceOperator
|
|
33
37
|
from maxframe.dataframe.datasource.read_odps_table import DataFrameReadODPSTable
|
|
34
|
-
from maxframe.
|
|
38
|
+
from maxframe.errors import (
|
|
39
|
+
MaxFrameError,
|
|
40
|
+
NoTaskServerResponseError,
|
|
41
|
+
SessionAlreadyClosedError,
|
|
42
|
+
)
|
|
43
|
+
from maxframe.io.objects import get_object_io_handler
|
|
44
|
+
from maxframe.io.odpsio import (
|
|
45
|
+
ODPSTableIO,
|
|
46
|
+
ODPSVolumeWriter,
|
|
47
|
+
pandas_to_arrow,
|
|
48
|
+
pandas_to_odps_schema,
|
|
49
|
+
)
|
|
35
50
|
from maxframe.protocol import (
|
|
36
51
|
DagInfo,
|
|
37
52
|
DagStatus,
|
|
38
53
|
ODPSTableResultInfo,
|
|
54
|
+
ODPSVolumeResultInfo,
|
|
39
55
|
ResultInfo,
|
|
40
56
|
SessionInfo,
|
|
41
57
|
)
|
|
@@ -46,8 +62,15 @@ from maxframe.session import (
|
|
|
46
62
|
Profiling,
|
|
47
63
|
Progress,
|
|
48
64
|
)
|
|
65
|
+
from maxframe.tensor.datasource import ArrayDataSource
|
|
49
66
|
from maxframe.typing_ import TileableType
|
|
50
|
-
from maxframe.utils import
|
|
67
|
+
from maxframe.utils import (
|
|
68
|
+
ToThreadMixin,
|
|
69
|
+
build_session_volume_name,
|
|
70
|
+
build_temp_table_name,
|
|
71
|
+
str_to_bool,
|
|
72
|
+
sync_pyodps_options,
|
|
73
|
+
)
|
|
51
74
|
|
|
52
75
|
from ..clients.framedriver import FrameDriverClient
|
|
53
76
|
from ..fetcher import get_fetcher_cls
|
|
@@ -58,6 +81,45 @@ logger = logging.getLogger(__name__)
|
|
|
58
81
|
|
|
59
82
|
|
|
60
83
|
class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
84
|
+
def get_settings_to_upload(self) -> Dict[str, Any]:
|
|
85
|
+
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
86
|
+
sql_settings.update(options.sql.settings or {})
|
|
87
|
+
|
|
88
|
+
quota_name = options.session.quota_name or getattr(
|
|
89
|
+
odps_options, "quota_name", None
|
|
90
|
+
)
|
|
91
|
+
lifecycle = options.session.table_lifecycle or odps_options.lifecycle
|
|
92
|
+
temp_lifecycle = (
|
|
93
|
+
options.session.temp_table_lifecycle or odps_options.temp_lifecycle
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
enable_schema = options.session.enable_schema
|
|
97
|
+
default_schema = options.session.default_schema
|
|
98
|
+
if hasattr(self, "_odps_entry"):
|
|
99
|
+
default_schema = default_schema or self._odps_entry.schema
|
|
100
|
+
|
|
101
|
+
# use flags in sql settings
|
|
102
|
+
if sql_settings.get("odps.default.schema"):
|
|
103
|
+
default_schema = sql_settings["odps.default.schema"]
|
|
104
|
+
if str_to_bool(
|
|
105
|
+
sql_settings.get("odps.namespace.schema") or "false"
|
|
106
|
+
) or str_to_bool(
|
|
107
|
+
sql_settings.get("odps.sql.allow.namespace.schema") or "false"
|
|
108
|
+
):
|
|
109
|
+
enable_schema = True
|
|
110
|
+
|
|
111
|
+
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
112
|
+
mf_settings["sql.settings"] = sql_settings
|
|
113
|
+
mf_settings["session.table_lifecycle"] = lifecycle
|
|
114
|
+
mf_settings["session.temp_table_lifecycle"] = temp_lifecycle
|
|
115
|
+
mf_settings["session.quota_name"] = quota_name
|
|
116
|
+
if enable_schema is not None:
|
|
117
|
+
mf_settings["session.enable_schema"] = enable_schema
|
|
118
|
+
if options.session.enable_high_availability is None:
|
|
119
|
+
mf_settings["session.enable_high_availability"] = not in_ipython_frontend()
|
|
120
|
+
mf_settings["session.default_schema"] = default_schema or "default"
|
|
121
|
+
return mf_settings
|
|
122
|
+
|
|
61
123
|
@abc.abstractmethod
|
|
62
124
|
def create_session(self) -> SessionInfo:
|
|
63
125
|
raise NotImplementedError
|
|
@@ -68,7 +130,10 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
|
68
130
|
|
|
69
131
|
@abc.abstractmethod
|
|
70
132
|
def submit_dag(
|
|
71
|
-
self,
|
|
133
|
+
self,
|
|
134
|
+
dag: TileableGraph,
|
|
135
|
+
managed_input_infos: Dict[str, ResultInfo],
|
|
136
|
+
new_settings: Dict[str, Any] = None,
|
|
72
137
|
) -> DagInfo:
|
|
73
138
|
raise NotImplementedError
|
|
74
139
|
|
|
@@ -84,6 +149,9 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
|
84
149
|
def decref(self, tileable_keys: List[str]) -> None:
|
|
85
150
|
raise NotImplementedError
|
|
86
151
|
|
|
152
|
+
def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
|
|
153
|
+
return None
|
|
154
|
+
|
|
87
155
|
|
|
88
156
|
class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
89
157
|
_odps_entry: Optional[ODPS]
|
|
@@ -119,6 +187,8 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
119
187
|
self._tileable_to_infos = weakref.WeakKeyDictionary()
|
|
120
188
|
|
|
121
189
|
self._caller = self._create_caller(odps_entry, address, **kwargs)
|
|
190
|
+
self._last_settings = None
|
|
191
|
+
self._pull_interval = 1 if in_ipython_frontend() else 3
|
|
122
192
|
|
|
123
193
|
@classmethod
|
|
124
194
|
def _create_caller(
|
|
@@ -128,28 +198,32 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
128
198
|
|
|
129
199
|
async def _init(self, _address: str):
|
|
130
200
|
session_info = await self.ensure_async_call(self._caller.create_session)
|
|
201
|
+
self._last_settings = copy.deepcopy(self._caller.get_settings_to_upload())
|
|
131
202
|
self._session_id = session_info.session_id
|
|
203
|
+
await self._show_logview_address()
|
|
132
204
|
|
|
133
|
-
def
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
or t.inputs
|
|
138
|
-
):
|
|
139
|
-
return None
|
|
140
|
-
|
|
141
|
-
schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
|
|
205
|
+
def _upload_and_get_table_read_tileable(
|
|
206
|
+
self, t: TileableType
|
|
207
|
+
) -> Optional[TileableType]:
|
|
208
|
+
table_schema, table_meta = pandas_to_odps_schema(t, unknown_as_string=True)
|
|
142
209
|
if self._odps_entry.exist_table(table_meta.table_name):
|
|
143
|
-
self._odps_entry.delete_table(
|
|
210
|
+
self._odps_entry.delete_table(
|
|
211
|
+
table_meta.table_name, hints=options.sql.settings
|
|
212
|
+
)
|
|
144
213
|
table_name = build_temp_table_name(self.session_id, t.key)
|
|
145
|
-
table_obj = self._odps_entry.create_table(
|
|
214
|
+
table_obj = self._odps_entry.create_table(
|
|
215
|
+
table_name,
|
|
216
|
+
table_schema,
|
|
217
|
+
lifecycle=options.session.temp_table_lifecycle,
|
|
218
|
+
hints=options.sql.settings,
|
|
219
|
+
)
|
|
146
220
|
|
|
147
221
|
data = t.op.get_data()
|
|
148
222
|
batch_size = options.session.upload_batch_size
|
|
149
223
|
|
|
150
224
|
if len(data):
|
|
151
|
-
|
|
152
|
-
with
|
|
225
|
+
table_client = ODPSTableIO(self._odps_entry)
|
|
226
|
+
with table_client.open_writer(table_obj.full_table_name) as writer:
|
|
153
227
|
for batch_start in range(0, len(data), batch_size):
|
|
154
228
|
if isinstance(data, pd.Index):
|
|
155
229
|
batch = data[batch_start : batch_start + batch_size]
|
|
@@ -172,13 +246,35 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
172
246
|
read_tileable.name = t.name
|
|
173
247
|
else: # INDEX_TYPE
|
|
174
248
|
if list(read_tileable.names) != list(t.names):
|
|
175
|
-
read_tileable.names =
|
|
249
|
+
read_tileable.rename(t.names, inplace=True)
|
|
176
250
|
read_tileable._key = t.key
|
|
177
251
|
read_tileable.params = t.params
|
|
178
252
|
return read_tileable.data
|
|
179
253
|
|
|
254
|
+
def _upload_and_get_vol_read_tileable(
|
|
255
|
+
self, t: TileableType
|
|
256
|
+
) -> Optional[TileableType]:
|
|
257
|
+
vol_name = build_session_volume_name(self.session_id)
|
|
258
|
+
writer = ODPSVolumeWriter(self._odps_entry, vol_name, t.key)
|
|
259
|
+
io_handler = get_object_io_handler(t)
|
|
260
|
+
io_handler().write_object(writer, t, t.op.data)
|
|
261
|
+
return build_fetch(t).data
|
|
262
|
+
|
|
263
|
+
def _upload_and_get_read_tileable(self, t: TileableType) -> Optional[TileableType]:
|
|
264
|
+
if (
|
|
265
|
+
not isinstance(t.op, (ArrayDataSource, PandasDataSourceOperator))
|
|
266
|
+
or t.op.get_data() is None
|
|
267
|
+
or t.inputs
|
|
268
|
+
):
|
|
269
|
+
return None
|
|
270
|
+
with sync_pyodps_options():
|
|
271
|
+
if isinstance(t.op, PandasDataSourceOperator):
|
|
272
|
+
return self._upload_and_get_table_read_tileable(t)
|
|
273
|
+
else:
|
|
274
|
+
return self._upload_and_get_vol_read_tileable(t)
|
|
275
|
+
|
|
180
276
|
@enter_mode(kernel=True, build=True)
|
|
181
|
-
def
|
|
277
|
+
def _scan_and_replace_local_sources(
|
|
182
278
|
self, graph: TileableGraph
|
|
183
279
|
) -> Dict[TileableType, TileableType]:
|
|
184
280
|
"""Replaces Pandas data sources with temp table sources in the graph"""
|
|
@@ -199,7 +295,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
199
295
|
|
|
200
296
|
for succ in successors:
|
|
201
297
|
graph.add_edge(replaced, succ)
|
|
202
|
-
succ.
|
|
298
|
+
succ.op._set_inputs([replacements.get(t, t) for t in succ.inputs])
|
|
203
299
|
|
|
204
300
|
graph.results = [replacements.get(t, t) for t in graph.results]
|
|
205
301
|
return replacements
|
|
@@ -207,16 +303,41 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
207
303
|
@enter_mode(kernel=True, build=True)
|
|
208
304
|
def _get_input_infos(self, tileables: List[TileableType]) -> Dict[str, ResultInfo]:
|
|
209
305
|
"""Generate ResultInfo structs from generated temp tables"""
|
|
306
|
+
vol_name = build_session_volume_name(self.session_id)
|
|
307
|
+
|
|
210
308
|
infos = dict()
|
|
211
309
|
for t in tileables:
|
|
212
310
|
key = t.key
|
|
213
|
-
if
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
311
|
+
if isinstance(t.op, DataFrameReadODPSTable):
|
|
312
|
+
infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
|
|
313
|
+
else:
|
|
314
|
+
if isinstance(t.op, Fetch):
|
|
315
|
+
infos[key] = ODPSVolumeResultInfo(
|
|
316
|
+
volume_name=vol_name, volume_path=t.key
|
|
317
|
+
)
|
|
318
|
+
elif t.inputs and isinstance(t.inputs[0].op, DataFrameReadODPSTable):
|
|
319
|
+
t = t.inputs[0]
|
|
320
|
+
infos[key] = ODPSTableResultInfo(full_table_name=t.op.table_name)
|
|
218
321
|
return infos
|
|
219
322
|
|
|
323
|
+
def _get_diff_settings(self) -> Dict[str, Any]:
|
|
324
|
+
new_settings = self._caller.get_settings_to_upload()
|
|
325
|
+
if not self._last_settings: # pragma: no cover
|
|
326
|
+
self._last_settings = copy.deepcopy(new_settings)
|
|
327
|
+
return new_settings
|
|
328
|
+
|
|
329
|
+
update = dict()
|
|
330
|
+
for k in new_settings.keys():
|
|
331
|
+
old_item = self._last_settings.get(k)
|
|
332
|
+
new_item = new_settings.get(k)
|
|
333
|
+
try:
|
|
334
|
+
if old_item != new_item:
|
|
335
|
+
update[k] = new_item
|
|
336
|
+
except: # noqa: E722 # nosec # pylint: disable=bare-except
|
|
337
|
+
update[k] = new_item
|
|
338
|
+
self._last_settings = copy.deepcopy(new_settings)
|
|
339
|
+
return update
|
|
340
|
+
|
|
220
341
|
async def execute(self, *tileables, **kwargs) -> ExecutionInfo:
|
|
221
342
|
tileables = [
|
|
222
343
|
tileable.data if isinstance(tileable, Entity) else tileable
|
|
@@ -226,7 +347,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
226
347
|
tileable_graph, to_execute_tileables = gen_submit_tileable_graph(
|
|
227
348
|
self, tileables, tileable_to_copied
|
|
228
349
|
)
|
|
229
|
-
source_replacements = self.
|
|
350
|
+
source_replacements = self._scan_and_replace_local_sources(tileable_graph)
|
|
230
351
|
|
|
231
352
|
# we need to manage uploaded data sources with refcounting mechanism
|
|
232
353
|
# as nodes in tileable_graph are copied, we need to use original nodes
|
|
@@ -236,9 +357,14 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
236
357
|
|
|
237
358
|
replaced_infos = self._get_input_infos(list(source_replacements.values()))
|
|
238
359
|
dag_info = await self.ensure_async_call(
|
|
239
|
-
self._caller.submit_dag,
|
|
360
|
+
self._caller.submit_dag,
|
|
361
|
+
tileable_graph,
|
|
362
|
+
replaced_infos,
|
|
363
|
+
self._get_diff_settings(),
|
|
240
364
|
)
|
|
241
365
|
|
|
366
|
+
await self._show_logview_address(dag_info.dag_id)
|
|
367
|
+
|
|
242
368
|
progress = Progress()
|
|
243
369
|
profiling = Profiling()
|
|
244
370
|
aio_task = asyncio.create_task(
|
|
@@ -256,25 +382,55 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
256
382
|
self, dag_info: DagInfo, tileables: List, progress: Progress
|
|
257
383
|
):
|
|
258
384
|
start_time = time.time()
|
|
385
|
+
session_id = dag_info.session_id
|
|
259
386
|
dag_id = dag_info.dag_id
|
|
260
|
-
|
|
387
|
+
server_no_response_time = None
|
|
261
388
|
with enter_mode(build=True, kernel=True):
|
|
262
389
|
key_to_tileables = {t.key: t for t in tileables}
|
|
263
|
-
|
|
390
|
+
timeout_val = 0.1
|
|
264
391
|
try:
|
|
265
392
|
while True:
|
|
266
393
|
elapsed_time = time.time() - start_time
|
|
394
|
+
next_timeout_val = min(timeout_val * 2, self._pull_interval)
|
|
267
395
|
timeout_val = (
|
|
268
|
-
min(self.timeout - elapsed_time,
|
|
396
|
+
min(self.timeout - elapsed_time, next_timeout_val)
|
|
269
397
|
if self.timeout
|
|
270
|
-
else
|
|
398
|
+
else next_timeout_val
|
|
271
399
|
)
|
|
272
400
|
if timeout_val <= 0:
|
|
273
401
|
raise TimeoutError("Running DAG timed out")
|
|
274
402
|
|
|
275
|
-
|
|
276
|
-
self.
|
|
277
|
-
|
|
403
|
+
try:
|
|
404
|
+
dag_info: DagInfo = await self.ensure_async_call(
|
|
405
|
+
self._caller.get_dag_info, dag_id
|
|
406
|
+
)
|
|
407
|
+
server_no_response_time = None
|
|
408
|
+
except (NoTaskServerResponseError, SessionAlreadyClosedError) as ex:
|
|
409
|
+
# when we receive SessionAlreadyClosedError after NoTaskServerResponseError
|
|
410
|
+
# is received, it is possible that task server is restarted and
|
|
411
|
+
# SessionAlreadyClosedError might be flaky. Otherwise, the error
|
|
412
|
+
# should be raised.
|
|
413
|
+
if (
|
|
414
|
+
isinstance(ex, SessionAlreadyClosedError)
|
|
415
|
+
and not server_no_response_time
|
|
416
|
+
):
|
|
417
|
+
raise
|
|
418
|
+
server_no_response_time = server_no_response_time or time.time()
|
|
419
|
+
if (
|
|
420
|
+
time.time() - server_no_response_time
|
|
421
|
+
> options.client.task_restart_timeout
|
|
422
|
+
):
|
|
423
|
+
raise MaxFrameError(
|
|
424
|
+
"Failed to get valid response from service. "
|
|
425
|
+
f"Session {self._session_id}."
|
|
426
|
+
) from None
|
|
427
|
+
await asyncio.sleep(timeout_val)
|
|
428
|
+
continue
|
|
429
|
+
|
|
430
|
+
if dag_info is None:
|
|
431
|
+
raise SystemError(
|
|
432
|
+
f"Cannot find DAG with ID {dag_id} in session {session_id}"
|
|
433
|
+
)
|
|
278
434
|
progress.value = dag_info.progress
|
|
279
435
|
if dag_info.status != DagStatus.RUNNING:
|
|
280
436
|
break
|
|
@@ -294,6 +450,8 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
294
450
|
|
|
295
451
|
for key, result_info in dag_info.tileable_to_result_infos.items():
|
|
296
452
|
t = key_to_tileables[key]
|
|
453
|
+
fetcher = get_fetcher_cls(result_info.result_type)(self._odps_entry)
|
|
454
|
+
await fetcher.update_tileable_meta(t, result_info)
|
|
297
455
|
self._tileable_to_infos[t] = result_info
|
|
298
456
|
|
|
299
457
|
def _get_data_tileable_and_indexes(
|
|
@@ -334,7 +492,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
334
492
|
data_tileable, indexes = self._get_data_tileable_and_indexes(tileable)
|
|
335
493
|
info = self._tileable_to_infos[data_tileable]
|
|
336
494
|
fetcher = get_fetcher_cls(info.result_type)(self._odps_entry)
|
|
337
|
-
results.append(await fetcher.fetch(
|
|
495
|
+
results.append(await fetcher.fetch(data_tileable, info, indexes))
|
|
338
496
|
return results
|
|
339
497
|
|
|
340
498
|
async def decref(self, *tileable_keys):
|
|
@@ -388,12 +546,32 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
388
546
|
async def get_mutable_tensor(self, name: str):
|
|
389
547
|
raise NotImplementedError
|
|
390
548
|
|
|
549
|
+
async def get_logview_address(self, hours=None) -> Optional[str]:
|
|
550
|
+
return await self.get_dag_logview_address(None, hours)
|
|
551
|
+
|
|
552
|
+
async def get_dag_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
|
|
553
|
+
return await self.ensure_async_call(
|
|
554
|
+
self._caller.get_logview_address, dag_id, hours
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
async def _show_logview_address(self, dag_id=None, hours=None):
|
|
558
|
+
identity = f"Session ID: {self._session_id}"
|
|
559
|
+
if dag_id:
|
|
560
|
+
identity += f", DAG ID: {dag_id}"
|
|
561
|
+
|
|
562
|
+
logview_addr = await self.get_dag_logview_address(dag_id, hours)
|
|
563
|
+
if logview_addr:
|
|
564
|
+
logger.info("%s, Logview: %s", identity, logview_addr)
|
|
565
|
+
else:
|
|
566
|
+
logger.info("%s, Logview address does not exist", identity)
|
|
567
|
+
|
|
391
568
|
|
|
392
569
|
class MaxFrameRestCaller(MaxFrameServiceCaller):
|
|
393
570
|
_client: FrameDriverClient
|
|
394
571
|
_session_id: Optional[str]
|
|
395
572
|
|
|
396
|
-
def __init__(self, client: FrameDriverClient):
|
|
573
|
+
def __init__(self, odps_entry: ODPS, client: FrameDriverClient):
|
|
574
|
+
self._odps_entry = odps_entry
|
|
397
575
|
self._client = client
|
|
398
576
|
self._session_id = None
|
|
399
577
|
|
|
@@ -406,9 +584,14 @@ class MaxFrameRestCaller(MaxFrameServiceCaller):
|
|
|
406
584
|
await self._client.delete_session(self._session_id)
|
|
407
585
|
|
|
408
586
|
async def submit_dag(
|
|
409
|
-
self,
|
|
587
|
+
self,
|
|
588
|
+
dag: TileableGraph,
|
|
589
|
+
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
590
|
+
new_settings: Dict[str, Any] = None,
|
|
410
591
|
) -> DagInfo:
|
|
411
|
-
return await self._client.submit_dag(
|
|
592
|
+
return await self._client.submit_dag(
|
|
593
|
+
self._session_id, dag, managed_input_infos, new_settings=new_settings
|
|
594
|
+
)
|
|
412
595
|
|
|
413
596
|
async def get_dag_info(self, dag_id: str) -> DagInfo:
|
|
414
597
|
return await self._client.get_dag_info(self._session_id, dag_id)
|
|
@@ -446,7 +629,7 @@ class MaxFrameRestSession(MaxFrameSession):
|
|
|
446
629
|
|
|
447
630
|
@classmethod
|
|
448
631
|
def _create_caller(cls, odps_entry: ODPS, address: str, **kwargs):
|
|
449
|
-
return MaxFrameRestCaller(FrameDriverClient(address))
|
|
632
|
+
return MaxFrameRestCaller(odps_entry, FrameDriverClient(address))
|
|
450
633
|
|
|
451
634
|
|
|
452
635
|
def register_session_schemes(overwrite: bool = False):
|