maxframe 0.1.0b4__cp38-cp38-win_amd64.whl → 1.0.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp38-win_amd64.pyd +0 -0
- maxframe/codegen.py +56 -5
- maxframe/config/config.py +78 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +2 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +58 -12
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +120 -24
- maxframe/dataframe/datasource/read_odps_table.py +9 -4
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +6 -11
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +93 -1
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +4 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +33 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +110 -0
- maxframe/learn/contrib/xgboost/core.py +241 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
- maxframe/learn/contrib/xgboost/predict.py +121 -0
- maxframe/learn/contrib/xgboost/regressor.py +71 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +132 -0
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +11 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +37 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +7 -2
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +4 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/misc/unique.py +205 -0
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +62 -3
- maxframe/utils.py +112 -86
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +123 -54
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +223 -40
- maxframe_client/session/task.py +108 -80
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +136 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -300
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe_client/__init__.py
CHANGED
|
@@ -63,9 +63,12 @@ class FrameDriverClient:
|
|
|
63
63
|
session_id: str,
|
|
64
64
|
dag: TileableGraph,
|
|
65
65
|
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
66
|
+
new_settings: Dict[str, Any] = None,
|
|
66
67
|
) -> DagInfo:
|
|
67
68
|
req_url = f"{self._endpoint}/api/sessions/{session_id}/dags"
|
|
68
|
-
req_body = ExecuteDagRequest(
|
|
69
|
+
req_body = ExecuteDagRequest(
|
|
70
|
+
session_id, dag, managed_input_infos, new_settings=new_settings
|
|
71
|
+
)
|
|
69
72
|
resp = await httpclient.AsyncHTTPClient().fetch(
|
|
70
73
|
req_url,
|
|
71
74
|
method="POST",
|
maxframe_client/fetcher.py
CHANGED
|
@@ -14,16 +14,24 @@
|
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
from numbers import Integral
|
|
17
|
-
from typing import Any, Dict, List, Type, Union
|
|
17
|
+
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
18
18
|
|
|
19
|
+
import pandas as pd
|
|
19
20
|
import pyarrow as pa
|
|
20
21
|
from odps import ODPS
|
|
21
|
-
from odps.models import ExternalVolume
|
|
22
|
-
from
|
|
22
|
+
from odps.models import ExternalVolume
|
|
23
|
+
from odps.tunnel import TableTunnel
|
|
23
24
|
|
|
24
25
|
from maxframe.core import OBJECT_TYPE
|
|
25
|
-
from maxframe.
|
|
26
|
-
from maxframe.
|
|
26
|
+
from maxframe.dataframe.core import DATAFRAME_TYPE
|
|
27
|
+
from maxframe.io.objects import get_object_io_handler
|
|
28
|
+
from maxframe.io.odpsio import (
|
|
29
|
+
ODPSTableIO,
|
|
30
|
+
ODPSVolumeReader,
|
|
31
|
+
arrow_to_pandas,
|
|
32
|
+
build_dataframe_table_meta,
|
|
33
|
+
odps_schema_to_pandas_dtypes,
|
|
34
|
+
)
|
|
27
35
|
from maxframe.protocol import (
|
|
28
36
|
DataFrameTableMeta,
|
|
29
37
|
ODPSTableResultInfo,
|
|
@@ -31,8 +39,9 @@ from maxframe.protocol import (
|
|
|
31
39
|
ResultInfo,
|
|
32
40
|
ResultType,
|
|
33
41
|
)
|
|
42
|
+
from maxframe.tensor.core import TENSOR_TYPE
|
|
34
43
|
from maxframe.typing_ import PandasObjectTypes, TileableType
|
|
35
|
-
from maxframe.utils import ToThreadMixin
|
|
44
|
+
from maxframe.utils import ToThreadMixin, sync_pyodps_options
|
|
36
45
|
|
|
37
46
|
_result_fetchers: Dict[ResultType, Type["ResultFetcher"]] = dict()
|
|
38
47
|
|
|
@@ -52,6 +61,14 @@ class ResultFetcher(ABC):
|
|
|
52
61
|
def __init__(self, odps_entry: ODPS):
|
|
53
62
|
self._odps_entry = odps_entry
|
|
54
63
|
|
|
64
|
+
@abstractmethod
|
|
65
|
+
async def update_tileable_meta(
|
|
66
|
+
self,
|
|
67
|
+
tileable: TileableType,
|
|
68
|
+
info: ResultInfo,
|
|
69
|
+
) -> None:
|
|
70
|
+
raise NotImplementedError
|
|
71
|
+
|
|
55
72
|
@abstractmethod
|
|
56
73
|
async def fetch(
|
|
57
74
|
self,
|
|
@@ -66,6 +83,13 @@ class ResultFetcher(ABC):
|
|
|
66
83
|
class NullFetcher(ResultFetcher):
|
|
67
84
|
result_type = ResultType.NULL
|
|
68
85
|
|
|
86
|
+
async def update_tileable_meta(
|
|
87
|
+
self,
|
|
88
|
+
tileable: TileableType,
|
|
89
|
+
info: ResultInfo,
|
|
90
|
+
) -> None:
|
|
91
|
+
return
|
|
92
|
+
|
|
69
93
|
async def fetch(
|
|
70
94
|
self,
|
|
71
95
|
tileable: TileableType,
|
|
@@ -79,13 +103,79 @@ class NullFetcher(ResultFetcher):
|
|
|
79
103
|
class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
80
104
|
result_type = ResultType.ODPS_TABLE
|
|
81
105
|
|
|
106
|
+
def _get_table_comment(self, table_name: str) -> Optional[str]:
|
|
107
|
+
table = self._odps_entry.get_table(table_name)
|
|
108
|
+
return getattr(table, "comment", None)
|
|
109
|
+
|
|
110
|
+
async def update_tileable_meta(
|
|
111
|
+
self,
|
|
112
|
+
tileable: TileableType,
|
|
113
|
+
info: ODPSTableResultInfo,
|
|
114
|
+
) -> None:
|
|
115
|
+
if (
|
|
116
|
+
isinstance(tileable, DATAFRAME_TYPE)
|
|
117
|
+
and tileable.dtypes is None
|
|
118
|
+
and info.table_meta is not None
|
|
119
|
+
):
|
|
120
|
+
if info.table_meta.pd_column_dtypes is not None:
|
|
121
|
+
tileable.refresh_from_table_meta(info.table_meta)
|
|
122
|
+
else:
|
|
123
|
+
# need to get meta directly from table
|
|
124
|
+
table = self._odps_entry.get_table(info.full_table_name)
|
|
125
|
+
pd_dtypes = odps_schema_to_pandas_dtypes(table.table_schema).drop(
|
|
126
|
+
info.table_meta.table_index_column_names
|
|
127
|
+
)
|
|
128
|
+
tileable.refresh_from_dtypes(pd_dtypes)
|
|
129
|
+
|
|
130
|
+
if tileable.shape and any(pd.isna(x) for x in tileable.shape):
|
|
131
|
+
part_specs = [None] if not info.partition_specs else info.partition_specs
|
|
132
|
+
|
|
133
|
+
with sync_pyodps_options():
|
|
134
|
+
table = self._odps_entry.get_table(info.full_table_name)
|
|
135
|
+
if isinstance(tileable, DATAFRAME_TYPE) and tileable.dtypes is None:
|
|
136
|
+
dtypes = odps_schema_to_pandas_dtypes(table.table_schema)
|
|
137
|
+
tileable.refresh_from_dtypes(dtypes)
|
|
138
|
+
|
|
139
|
+
tunnel = TableTunnel(self._odps_entry)
|
|
140
|
+
total_records = 0
|
|
141
|
+
for part_spec in part_specs:
|
|
142
|
+
session = tunnel.create_download_session(table, part_spec)
|
|
143
|
+
total_records += session.count
|
|
144
|
+
|
|
145
|
+
new_shape_list = list(tileable.shape)
|
|
146
|
+
new_shape_list[0] = total_records
|
|
147
|
+
tileable.params = {"shape": tuple(new_shape_list)}
|
|
148
|
+
|
|
149
|
+
@staticmethod
|
|
150
|
+
def _align_selection_with_shape(
|
|
151
|
+
row_sel: slice, shape: Tuple[Optional[int], ...]
|
|
152
|
+
) -> dict:
|
|
153
|
+
size = shape[0]
|
|
154
|
+
if not row_sel.start and not row_sel.stop:
|
|
155
|
+
return {}
|
|
156
|
+
is_reversed = row_sel.step is not None and row_sel.step < 0
|
|
157
|
+
read_kw = {
|
|
158
|
+
"start": row_sel.start,
|
|
159
|
+
"stop": row_sel.stop,
|
|
160
|
+
"reverse_range": is_reversed,
|
|
161
|
+
}
|
|
162
|
+
if pd.isna(size):
|
|
163
|
+
return read_kw
|
|
164
|
+
|
|
165
|
+
if is_reversed and row_sel.start is not None:
|
|
166
|
+
read_kw["start"] = min(size - 1, row_sel.start)
|
|
167
|
+
if not is_reversed and row_sel.stop is not None:
|
|
168
|
+
read_kw["stop"] = min(size, row_sel.stop)
|
|
169
|
+
return read_kw
|
|
170
|
+
|
|
82
171
|
def _read_single_source(
|
|
83
172
|
self,
|
|
84
173
|
table_meta: DataFrameTableMeta,
|
|
85
174
|
info: ODPSTableResultInfo,
|
|
86
175
|
indexes: List[Union[None, Integral, slice]],
|
|
176
|
+
shape: Tuple[Optional[int], ...],
|
|
87
177
|
):
|
|
88
|
-
table_io =
|
|
178
|
+
table_io = ODPSTableIO(self._odps_entry)
|
|
89
179
|
read_kw = {}
|
|
90
180
|
row_step = None
|
|
91
181
|
if indexes:
|
|
@@ -93,13 +183,8 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
|
93
183
|
indexes += [None]
|
|
94
184
|
row_sel, col_sel = indexes
|
|
95
185
|
if isinstance(row_sel, slice):
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
read_kw["stop"] = row_sel.stop
|
|
99
|
-
read_kw["reverse_range"] = (
|
|
100
|
-
row_sel.step is not None and row_sel.step < 0
|
|
101
|
-
)
|
|
102
|
-
row_step = row_sel.step
|
|
186
|
+
row_step = row_sel.step
|
|
187
|
+
read_kw = self._align_selection_with_shape(row_sel, shape)
|
|
103
188
|
elif isinstance(row_sel, int):
|
|
104
189
|
read_kw["start"] = row_sel
|
|
105
190
|
read_kw["stop"] = row_sel + 1
|
|
@@ -118,8 +203,8 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
|
118
203
|
with table_io.open_reader(
|
|
119
204
|
info.full_table_name, info.partition_specs, **read_kw
|
|
120
205
|
) as reader:
|
|
121
|
-
reader_count = reader.count
|
|
122
206
|
result = reader.read_all()
|
|
207
|
+
reader_count = result.num_rows
|
|
123
208
|
|
|
124
209
|
if not row_step:
|
|
125
210
|
return result
|
|
@@ -140,7 +225,7 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
|
140
225
|
) -> PandasObjectTypes:
|
|
141
226
|
table_meta = build_dataframe_table_meta(tileable)
|
|
142
227
|
arrow_table: pa.Table = await self.to_thread(
|
|
143
|
-
self._read_single_source, table_meta, info, indexes
|
|
228
|
+
self._read_single_source, table_meta, info, indexes, tileable.shape
|
|
144
229
|
)
|
|
145
230
|
return arrow_to_pandas(arrow_table, table_meta)
|
|
146
231
|
|
|
@@ -149,47 +234,31 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
|
149
234
|
class ODPSVolumeFetcher(ToThreadMixin, ResultFetcher):
|
|
150
235
|
result_type = ResultType.ODPS_VOLUME
|
|
151
236
|
|
|
152
|
-
async def
|
|
153
|
-
self,
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
return await self.to_thread(sync_read)
|
|
160
|
-
|
|
161
|
-
async def _read_external_volume_data(
|
|
162
|
-
self, volume: ExternalVolume, path: str, file_name: str
|
|
163
|
-
) -> bytes:
|
|
164
|
-
signed_url = await self.to_thread(
|
|
165
|
-
volume.get_sign_url, path + "/" + file_name, "GET"
|
|
166
|
-
)
|
|
167
|
-
http_client = httpclient.AsyncHTTPClient()
|
|
168
|
-
|
|
169
|
-
resp = await http_client.fetch(signed_url)
|
|
170
|
-
if hasattr(resp, "status_code") and resp.code >= 400:
|
|
171
|
-
try:
|
|
172
|
-
import oss2.exceptions
|
|
237
|
+
async def update_tileable_meta(
|
|
238
|
+
self,
|
|
239
|
+
tileable: TileableType,
|
|
240
|
+
info: ODPSVolumeResultInfo,
|
|
241
|
+
) -> None:
|
|
242
|
+
return
|
|
173
243
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
244
|
+
async def _fetch_object(
|
|
245
|
+
self,
|
|
246
|
+
tileable: TileableType,
|
|
247
|
+
info: ODPSVolumeResultInfo,
|
|
248
|
+
indexes: List[Union[Integral, slice]],
|
|
249
|
+
) -> Any:
|
|
250
|
+
def volume_fetch_func():
|
|
251
|
+
reader = ODPSVolumeReader(
|
|
252
|
+
self._odps_entry, info.volume_name, info.volume_path
|
|
253
|
+
)
|
|
254
|
+
io_handler = get_object_io_handler(tileable)()
|
|
255
|
+
return io_handler.read_object(reader, tileable, indexes)
|
|
179
256
|
|
|
180
|
-
async def _fetch_object(self, info: ODPSVolumeResultInfo) -> Any:
|
|
181
257
|
volume = await self.to_thread(self._odps_entry.get_volume, info.volume_name)
|
|
182
|
-
if isinstance(volume,
|
|
183
|
-
|
|
184
|
-
volume, info.volume_path, "data"
|
|
185
|
-
)
|
|
186
|
-
elif isinstance(volume, ExternalVolume):
|
|
187
|
-
byte_data = await self._read_external_volume_data(
|
|
188
|
-
volume, info.volume_path, "data"
|
|
189
|
-
)
|
|
258
|
+
if isinstance(volume, ExternalVolume):
|
|
259
|
+
return await self.to_thread(volume_fetch_func)
|
|
190
260
|
else:
|
|
191
261
|
raise NotImplementedError(f"Volume type {type(volume)} not supported")
|
|
192
|
-
return pickle.loads(byte_data)
|
|
193
262
|
|
|
194
263
|
async def fetch(
|
|
195
264
|
self,
|
|
@@ -197,6 +266,6 @@ class ODPSVolumeFetcher(ToThreadMixin, ResultFetcher):
|
|
|
197
266
|
info: ODPSVolumeResultInfo,
|
|
198
267
|
indexes: List[Union[Integral, slice]],
|
|
199
268
|
) -> Any:
|
|
200
|
-
if isinstance(tileable, OBJECT_TYPE):
|
|
201
|
-
return await self._fetch_object(info)
|
|
269
|
+
if isinstance(tileable, (OBJECT_TYPE, TENSOR_TYPE)):
|
|
270
|
+
return await self._fetch_object(tileable, info, indexes)
|
|
202
271
|
raise NotImplementedError(f"Fetching {type(tileable)} not implemented")
|
|
@@ -12,6 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
# retry consts
|
|
16
|
+
EMPTY_RESPONSE_RETRY_COUNT = 5
|
|
17
|
+
|
|
15
18
|
# Restful Service
|
|
16
19
|
RESTFUL_SESSION_INSECURE_SCHEME = "mf"
|
|
17
20
|
RESTFUL_SESSION_SECURE_SCHEME = "mfs"
|
maxframe_client/session/graph.py
CHANGED
|
@@ -19,10 +19,16 @@ from dataclasses import dataclass
|
|
|
19
19
|
from typing import Any, Dict, List, Tuple, Union
|
|
20
20
|
from weakref import WeakSet
|
|
21
21
|
|
|
22
|
-
from maxframe.core import
|
|
22
|
+
from maxframe.core import (
|
|
23
|
+
ChunkType,
|
|
24
|
+
TileableGraph,
|
|
25
|
+
TileableType,
|
|
26
|
+
build_fetch,
|
|
27
|
+
enter_mode,
|
|
28
|
+
)
|
|
23
29
|
from maxframe.core.operator import Fetch
|
|
24
30
|
from maxframe.session import AbstractSession
|
|
25
|
-
from maxframe.utils import
|
|
31
|
+
from maxframe.utils import copy_tileables
|
|
26
32
|
|
|
27
33
|
logger = logging.getLogger(__name__)
|
|
28
34
|
|