maxframe 1.1.0__cp39-cp39-macosx_10_9_universal2.whl → 1.1.1__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +23 -0
- maxframe/config/tests/test_config.py +20 -1
- maxframe/conftest.py +8 -2
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/dataframe/datasource/read_odps_query.py +2 -1
- maxframe/dataframe/datasource/read_odps_table.py +4 -2
- maxframe/io/odpsio/__init__.py +1 -1
- maxframe/io/odpsio/arrow.py +22 -1
- maxframe/io/odpsio/tableio.py +57 -17
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/serializables/core.py +46 -10
- maxframe/serialization/serializables/tests/test_serializable.py +4 -1
- maxframe/tensor/merge/vstack.py +1 -1
- maxframe/utils.py +6 -0
- {maxframe-1.1.0.dist-info → maxframe-1.1.1.dist-info}/METADATA +3 -3
- {maxframe-1.1.0.dist-info → maxframe-1.1.1.dist-info}/RECORD +25 -25
- {maxframe-1.1.0.dist-info → maxframe-1.1.1.dist-info}/WHEEL +1 -1
- maxframe_client/fetcher.py +5 -6
- maxframe_client/session/odps.py +17 -1
- maxframe_client/session/task.py +4 -1
- maxframe_client/session/tests/test_task.py +35 -2
- {maxframe-1.1.0.dist-info → maxframe-1.1.1.dist-info}/top_level.txt +0 -0
|
Binary file
|
maxframe/config/__init__.py
CHANGED
maxframe/config/config.py
CHANGED
|
@@ -28,6 +28,8 @@ except ImportError:
|
|
|
28
28
|
|
|
29
29
|
available_timezones = lambda: all_timezones
|
|
30
30
|
|
|
31
|
+
import logging
|
|
32
|
+
|
|
31
33
|
from ..utils import get_python_tag
|
|
32
34
|
from .validators import (
|
|
33
35
|
ValidatorType,
|
|
@@ -43,6 +45,8 @@ from .validators import (
|
|
|
43
45
|
is_valid_cache_path,
|
|
44
46
|
)
|
|
45
47
|
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
46
50
|
_DEFAULT_REDIRECT_WARN = "Option {source} has been replaced by {target} and might be removed in a future release."
|
|
47
51
|
_DEFAULT_MAX_ALIVE_SECONDS = 3 * 24 * 3600
|
|
48
52
|
_DEFAULT_MAX_IDLE_SECONDS = 3600
|
|
@@ -499,3 +503,22 @@ class OptionsProxy:
|
|
|
499
503
|
|
|
500
504
|
|
|
501
505
|
options = OptionsProxy()
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def update_wlm_quota_settings(session_id: str, engine_settings: Dict[str, Any]):
|
|
509
|
+
engine_quota = engine_settings.get("odps.task.wlm.quota", None)
|
|
510
|
+
session_quota = options.session.quota_name or None
|
|
511
|
+
if engine_quota != session_quota and engine_quota:
|
|
512
|
+
logger.warning(
|
|
513
|
+
"[Session=%s] Session quota (%s) is different to SubDag engine quota (%s)",
|
|
514
|
+
session_id,
|
|
515
|
+
session_quota,
|
|
516
|
+
engine_quota,
|
|
517
|
+
)
|
|
518
|
+
# TODO(renxiang): overwrite or not overwrite
|
|
519
|
+
return
|
|
520
|
+
|
|
521
|
+
if session_quota:
|
|
522
|
+
engine_settings["odps.task.wlm.quota"] = session_quota
|
|
523
|
+
elif "odps.task.wlm.quota" in engine_settings:
|
|
524
|
+
engine_settings.pop("odps.task.wlm.quota")
|
|
@@ -18,7 +18,14 @@ import threading
|
|
|
18
18
|
|
|
19
19
|
import pytest
|
|
20
20
|
|
|
21
|
-
from ..config import
|
|
21
|
+
from ..config import (
|
|
22
|
+
Config,
|
|
23
|
+
is_integer,
|
|
24
|
+
is_string,
|
|
25
|
+
option_context,
|
|
26
|
+
options,
|
|
27
|
+
update_wlm_quota_settings,
|
|
28
|
+
)
|
|
22
29
|
|
|
23
30
|
|
|
24
31
|
def test_config_context():
|
|
@@ -101,3 +108,15 @@ def test_config_copy():
|
|
|
101
108
|
|
|
102
109
|
target_cfg.update(src_cfg_dict)
|
|
103
110
|
assert target_cfg.a.b.c == 1
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_update_wlm_quota_settings():
|
|
114
|
+
with option_context({}):
|
|
115
|
+
options.session.quota_name = "quota1"
|
|
116
|
+
engine_settings = {}
|
|
117
|
+
update_wlm_quota_settings("session_id", engine_settings)
|
|
118
|
+
assert engine_settings["odps.task.wlm.quota"] == "quota1"
|
|
119
|
+
options.session.quota_name = None
|
|
120
|
+
update_wlm_quota_settings("session_id", engine_settings)
|
|
121
|
+
# TODO(renxiang): overwrite or not overwrite
|
|
122
|
+
assert "odps.task.wlm.quota" in engine_settings
|
maxframe/conftest.py
CHANGED
|
@@ -40,10 +40,14 @@ def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
|
|
|
40
40
|
access_id = test_config.get(section_name, "access_id")
|
|
41
41
|
except NoOptionError:
|
|
42
42
|
access_id = test_config.get("odps", "access_id")
|
|
43
|
+
if not access_id:
|
|
44
|
+
access_id = os.getenv("ACCESS_ID")
|
|
43
45
|
try:
|
|
44
46
|
secret_access_key = test_config.get(section_name, "secret_access_key")
|
|
45
47
|
except NoOptionError:
|
|
46
48
|
secret_access_key = test_config.get("odps", "secret_access_key")
|
|
49
|
+
if not secret_access_key:
|
|
50
|
+
secret_access_key = os.getenv("SECRET_ACCESS_KEY")
|
|
47
51
|
try:
|
|
48
52
|
project = test_config.get(section_name, "project")
|
|
49
53
|
except NoOptionError:
|
|
@@ -119,8 +123,10 @@ def oss_config():
|
|
|
119
123
|
old_cache_url = options.object_cache_url
|
|
120
124
|
|
|
121
125
|
try:
|
|
122
|
-
oss_access_id = config.get("oss", "access_id")
|
|
123
|
-
oss_secret_access_key = config.get("oss", "secret_access_key")
|
|
126
|
+
oss_access_id = config.get("oss", "access_id") or os.getenv("ACCESS_ID")
|
|
127
|
+
oss_secret_access_key = config.get("oss", "secret_access_key") or os.getenv(
|
|
128
|
+
"SECRET_ACCESS_KEY"
|
|
129
|
+
)
|
|
124
130
|
oss_bucket_name = config.get("oss", "bucket_name")
|
|
125
131
|
oss_endpoint = config.get("oss", "endpoint")
|
|
126
132
|
oss_rolearn = config.get("oss", "rolearn")
|
|
Binary file
|
|
@@ -37,6 +37,7 @@ from ...serialization.serializables import (
|
|
|
37
37
|
SeriesField,
|
|
38
38
|
StringField,
|
|
39
39
|
)
|
|
40
|
+
from ...utils import is_empty
|
|
40
41
|
from ..utils import parse_index
|
|
41
42
|
from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
|
|
42
43
|
|
|
@@ -250,7 +251,7 @@ class DataFrameReadODPSQuery(
|
|
|
250
251
|
self.columns = columns
|
|
251
252
|
|
|
252
253
|
def __call__(self, chunk_bytes=None, chunk_size=None):
|
|
253
|
-
if
|
|
254
|
+
if is_empty(self.index_columns):
|
|
254
255
|
index_value = parse_index(pd.RangeIndex(0))
|
|
255
256
|
elif len(self.index_columns) == 1:
|
|
256
257
|
index_value = parse_index(
|
|
@@ -34,6 +34,7 @@ from ...serialization.serializables import (
|
|
|
34
34
|
SeriesField,
|
|
35
35
|
StringField,
|
|
36
36
|
)
|
|
37
|
+
from ...utils import is_empty
|
|
37
38
|
from ..core import DataFrame # noqa: F401
|
|
38
39
|
from ..utils import parse_index
|
|
39
40
|
from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
|
|
@@ -76,7 +77,7 @@ class DataFrameReadODPSTable(
|
|
|
76
77
|
self.columns = columns
|
|
77
78
|
|
|
78
79
|
def __call__(self, shape, chunk_bytes=None, chunk_size=None):
|
|
79
|
-
if
|
|
80
|
+
if is_empty(self.index_columns):
|
|
80
81
|
if np.isnan(shape[0]):
|
|
81
82
|
index_value = parse_index(pd.RangeIndex(0))
|
|
82
83
|
else:
|
|
@@ -238,7 +239,8 @@ def read_odps_table(
|
|
|
238
239
|
partitions = [partitions]
|
|
239
240
|
|
|
240
241
|
append_partitions = append_partitions or any(
|
|
241
|
-
pt.name in (columns
|
|
242
|
+
pt.name in (columns if not is_empty(columns) else ())
|
|
243
|
+
for pt in (table.table_schema.partitions or ())
|
|
242
244
|
)
|
|
243
245
|
op = DataFrameReadODPSTable(
|
|
244
246
|
table_name=table.full_table_name,
|
maxframe/io/odpsio/__init__.py
CHANGED
maxframe/io/odpsio/arrow.py
CHANGED
|
@@ -14,10 +14,12 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Tuple, Union
|
|
16
16
|
|
|
17
|
+
import numpy as np
|
|
17
18
|
import pandas as pd
|
|
18
19
|
import pyarrow as pa
|
|
19
20
|
|
|
20
21
|
from ...core import OutputType
|
|
22
|
+
from ...lib.version import parse as parse_version
|
|
21
23
|
from ...protocol import DataFrameTableMeta
|
|
22
24
|
from ...tensor.core import TENSOR_TYPE
|
|
23
25
|
from ...typing_ import ArrowTableType, PandasObjectTypes
|
|
@@ -109,7 +111,26 @@ def pandas_to_arrow(
|
|
|
109
111
|
df = pd.DataFrame([[df]], columns=names)
|
|
110
112
|
else: # this could never happen # pragma: no cover
|
|
111
113
|
raise ValueError(f"Does not support meta type {table_meta.type!r}")
|
|
112
|
-
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
pa_table = pa.Table.from_pandas(df, nthreads=nthreads, preserve_index=False)
|
|
117
|
+
except pa.ArrowTypeError as ex: # pragma: no cover
|
|
118
|
+
late_np_version = parse_version(np.__version__) >= parse_version("1.20")
|
|
119
|
+
early_pa_version = parse_version(pa.__version__) <= parse_version("4.0")
|
|
120
|
+
if (
|
|
121
|
+
late_np_version
|
|
122
|
+
and early_pa_version
|
|
123
|
+
and "Did not pass numpy.dtype object" in str(ex)
|
|
124
|
+
):
|
|
125
|
+
raise TypeError(
|
|
126
|
+
"Potential dependency conflict. Try update to pyarrow>4.0 "
|
|
127
|
+
"or downgrade to numpy<1.20. Details can be seen at "
|
|
128
|
+
"https://github.com/numpy/numpy/issues/17913. "
|
|
129
|
+
f"Raw error message: {ex!r}"
|
|
130
|
+
).with_traceback(ex.__traceback__) from None
|
|
131
|
+
else:
|
|
132
|
+
raise
|
|
133
|
+
|
|
113
134
|
if table_datetime_cols:
|
|
114
135
|
col_names = pa_table.schema.names
|
|
115
136
|
col_datas = []
|
maxframe/io/odpsio/tableio.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import os
|
|
16
16
|
import time
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
|
+
from collections import OrderedDict
|
|
18
19
|
from contextlib import contextmanager
|
|
19
20
|
from typing import Dict, List, Optional, Union
|
|
20
21
|
|
|
@@ -25,7 +26,7 @@ from odps.apis.storage_api import (
|
|
|
25
26
|
TableBatchScanResponse,
|
|
26
27
|
TableBatchWriteResponse,
|
|
27
28
|
)
|
|
28
|
-
from odps.tunnel import TableTunnel
|
|
29
|
+
from odps.tunnel import TableDownloadSession, TableDownloadStatus, TableTunnel
|
|
29
30
|
from odps.types import OdpsSchema, PartitionSpec, timestamp_ntz
|
|
30
31
|
from odps.utils import call_with_retry
|
|
31
32
|
|
|
@@ -36,12 +37,13 @@ except ImportError:
|
|
|
36
37
|
|
|
37
38
|
from ...config import options
|
|
38
39
|
from ...env import ODPS_STORAGE_API_ENDPOINT
|
|
39
|
-
from ...utils import sync_pyodps_options
|
|
40
|
+
from ...utils import is_empty, sync_pyodps_options
|
|
40
41
|
from .schema import odps_schema_to_arrow_schema
|
|
41
42
|
|
|
42
43
|
PartitionsType = Union[List[str], str, None]
|
|
43
44
|
|
|
44
45
|
_DEFAULT_ROW_BATCH_SIZE = 4096
|
|
46
|
+
_DOWNLOAD_ID_CACHE_SIZE = 100
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
class ODPSTableIO(ABC):
|
|
@@ -65,7 +67,11 @@ class ODPSTableIO(ABC):
|
|
|
65
67
|
) -> OdpsSchema:
|
|
66
68
|
final_cols = []
|
|
67
69
|
|
|
68
|
-
columns =
|
|
70
|
+
columns = (
|
|
71
|
+
columns
|
|
72
|
+
if not is_empty(columns)
|
|
73
|
+
else [col.name for col in table_schema.simple_columns]
|
|
74
|
+
)
|
|
69
75
|
if partition_columns is True:
|
|
70
76
|
partition_columns = [c.name for c in table_schema.partitions]
|
|
71
77
|
else:
|
|
@@ -215,6 +221,46 @@ class TunnelMultiPartitionReader:
|
|
|
215
221
|
|
|
216
222
|
|
|
217
223
|
class TunnelTableIO(ODPSTableIO):
|
|
224
|
+
_down_session_ids = OrderedDict()
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def create_download_sessions(
|
|
228
|
+
cls,
|
|
229
|
+
odps_entry: ODPS,
|
|
230
|
+
full_table_name: str,
|
|
231
|
+
partitions: List[Optional[str]] = None,
|
|
232
|
+
) -> Dict[Optional[str], TableDownloadSession]:
|
|
233
|
+
table = odps_entry.get_table(full_table_name)
|
|
234
|
+
tunnel = TableTunnel(odps_entry)
|
|
235
|
+
parts = (
|
|
236
|
+
[partitions]
|
|
237
|
+
if partitions is None or isinstance(partitions, str)
|
|
238
|
+
else partitions
|
|
239
|
+
)
|
|
240
|
+
part_to_session = dict()
|
|
241
|
+
for part in parts:
|
|
242
|
+
part_key = (full_table_name, part)
|
|
243
|
+
down_session = None
|
|
244
|
+
|
|
245
|
+
if part_key in cls._down_session_ids:
|
|
246
|
+
down_id = cls._down_session_ids[part_key]
|
|
247
|
+
down_session = tunnel.create_download_session(
|
|
248
|
+
table, async_mode=True, partition_spec=part, download_id=down_id
|
|
249
|
+
)
|
|
250
|
+
if down_session.status != TableDownloadStatus.Normal:
|
|
251
|
+
down_session = None
|
|
252
|
+
|
|
253
|
+
if down_session is None:
|
|
254
|
+
down_session = tunnel.create_download_session(
|
|
255
|
+
table, async_mode=True, partition_spec=part
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
while len(cls._down_session_ids) >= _DOWNLOAD_ID_CACHE_SIZE:
|
|
259
|
+
cls._down_session_ids.popitem(False)
|
|
260
|
+
cls._down_session_ids[part_key] = down_session.id
|
|
261
|
+
part_to_session[part] = down_session
|
|
262
|
+
return part_to_session
|
|
263
|
+
|
|
218
264
|
@contextmanager
|
|
219
265
|
def open_reader(
|
|
220
266
|
self,
|
|
@@ -241,21 +287,15 @@ class TunnelTableIO(ODPSTableIO):
|
|
|
241
287
|
or (reverse_range and start is None)
|
|
242
288
|
):
|
|
243
289
|
with sync_pyodps_options():
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
290
|
+
tunnel_sessions = self.create_download_sessions(
|
|
291
|
+
self._odps, full_table_name, partitions
|
|
292
|
+
)
|
|
293
|
+
part_to_down_id = {
|
|
294
|
+
pt: session.id for (pt, session) in tunnel_sessions.items()
|
|
295
|
+
}
|
|
296
|
+
total_records = sum(
|
|
297
|
+
session.count for session in tunnel_sessions.values()
|
|
250
298
|
)
|
|
251
|
-
part_to_down_id = dict()
|
|
252
|
-
total_records = 0
|
|
253
|
-
for part in parts:
|
|
254
|
-
down_session = tunnel.create_download_session(
|
|
255
|
-
table, async_mode=True, partition_spec=part
|
|
256
|
-
)
|
|
257
|
-
part_to_down_id[part] = down_session.id
|
|
258
|
-
total_records += down_session.count
|
|
259
299
|
|
|
260
300
|
count = None
|
|
261
301
|
if start is not None or stop is not None:
|
|
Binary file
|
|
Binary file
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import weakref
|
|
16
|
-
from collections import
|
|
16
|
+
from collections import OrderedDict
|
|
17
17
|
from typing import Any, Dict, List, Optional, Tuple, Type
|
|
18
18
|
|
|
19
19
|
import msgpack
|
|
@@ -98,14 +98,18 @@ class SerializableMeta(type):
|
|
|
98
98
|
non_primitive_fields.append(v)
|
|
99
99
|
|
|
100
100
|
# count number of fields for every base class
|
|
101
|
-
cls_to_primitive_field_count =
|
|
102
|
-
cls_to_non_primitive_field_count =
|
|
101
|
+
cls_to_primitive_field_count = OrderedDict()
|
|
102
|
+
cls_to_non_primitive_field_count = OrderedDict()
|
|
103
103
|
for field_name in field_order:
|
|
104
104
|
cls_hash = field_to_cls_hash[field_name]
|
|
105
105
|
if field_name in primitive_field_names:
|
|
106
|
-
cls_to_primitive_field_count[cls_hash]
|
|
106
|
+
cls_to_primitive_field_count[cls_hash] = (
|
|
107
|
+
cls_to_primitive_field_count.get(cls_hash, 0) + 1
|
|
108
|
+
)
|
|
107
109
|
else:
|
|
108
|
-
cls_to_non_primitive_field_count[cls_hash]
|
|
110
|
+
cls_to_non_primitive_field_count[cls_hash] = (
|
|
111
|
+
cls_to_non_primitive_field_count.get(cls_hash, 0) + 1
|
|
112
|
+
)
|
|
109
113
|
|
|
110
114
|
slots = set(properties.pop("__slots__", set()))
|
|
111
115
|
slots.update(properties_field_slot_names)
|
|
@@ -120,9 +124,11 @@ class SerializableMeta(type):
|
|
|
120
124
|
properties["_FIELD_ORDER"] = field_order
|
|
121
125
|
properties["_FIELD_TO_NAME_HASH"] = field_to_cls_hash
|
|
122
126
|
properties["_PRIMITIVE_FIELDS"] = primitive_fields
|
|
123
|
-
properties["_CLS_TO_PRIMITIVE_FIELD_COUNT"] =
|
|
127
|
+
properties["_CLS_TO_PRIMITIVE_FIELD_COUNT"] = OrderedDict(
|
|
128
|
+
cls_to_primitive_field_count
|
|
129
|
+
)
|
|
124
130
|
properties["_NON_PRIMITIVE_FIELDS"] = non_primitive_fields
|
|
125
|
-
properties["_CLS_TO_NON_PRIMITIVE_FIELD_COUNT"] =
|
|
131
|
+
properties["_CLS_TO_NON_PRIMITIVE_FIELD_COUNT"] = OrderedDict(
|
|
126
132
|
cls_to_non_primitive_field_count
|
|
127
133
|
)
|
|
128
134
|
properties["__slots__"] = tuple(slots)
|
|
@@ -296,21 +302,51 @@ class SerializableSerializer(Serializer):
|
|
|
296
302
|
else:
|
|
297
303
|
field.set(obj, value)
|
|
298
304
|
|
|
305
|
+
@classmethod
|
|
306
|
+
def _prune_server_fields(
|
|
307
|
+
cls,
|
|
308
|
+
client_cls_to_field_count: Optional[Dict[int, int]],
|
|
309
|
+
server_cls_to_field_count: Dict[int, int],
|
|
310
|
+
server_fields: list,
|
|
311
|
+
) -> list:
|
|
312
|
+
if not client_cls_to_field_count: # pragma: no cover
|
|
313
|
+
# todo remove this branch when all versions below v0.1.0b5 is eliminated
|
|
314
|
+
return server_fields
|
|
315
|
+
if set(client_cls_to_field_count.keys()) == set(
|
|
316
|
+
server_cls_to_field_count.keys()
|
|
317
|
+
):
|
|
318
|
+
return server_fields
|
|
319
|
+
ret_server_fields = []
|
|
320
|
+
server_pos = 0
|
|
321
|
+
for cls_hash, count in server_cls_to_field_count.items():
|
|
322
|
+
if cls_hash in client_cls_to_field_count:
|
|
323
|
+
ret_server_fields.extend(server_fields[server_pos : server_pos + count])
|
|
324
|
+
server_pos += count
|
|
325
|
+
return ret_server_fields
|
|
326
|
+
|
|
299
327
|
@classmethod
|
|
300
328
|
def _set_field_values(
|
|
301
329
|
cls,
|
|
302
330
|
obj: Serializable,
|
|
303
331
|
values: List[Any],
|
|
304
|
-
client_cls_to_field_count: Optional[Dict[
|
|
332
|
+
client_cls_to_field_count: Optional[Dict[int, int]],
|
|
305
333
|
is_primitive: bool = True,
|
|
306
334
|
):
|
|
307
335
|
obj_class = type(obj)
|
|
308
336
|
if is_primitive:
|
|
309
337
|
server_cls_to_field_count = obj_class._CLS_TO_PRIMITIVE_FIELD_COUNT
|
|
310
|
-
server_fields =
|
|
338
|
+
server_fields = cls._prune_server_fields(
|
|
339
|
+
client_cls_to_field_count,
|
|
340
|
+
server_cls_to_field_count,
|
|
341
|
+
obj_class._PRIMITIVE_FIELDS,
|
|
342
|
+
)
|
|
311
343
|
else:
|
|
312
344
|
server_cls_to_field_count = obj_class._CLS_TO_NON_PRIMITIVE_FIELD_COUNT
|
|
313
|
-
server_fields =
|
|
345
|
+
server_fields = cls._prune_server_fields(
|
|
346
|
+
client_cls_to_field_count,
|
|
347
|
+
server_cls_to_field_count,
|
|
348
|
+
obj_class._NON_PRIMITIVE_FIELDS,
|
|
349
|
+
)
|
|
314
350
|
|
|
315
351
|
legacy_to_new_hash = {
|
|
316
352
|
c._LEGACY_NAME_HASH: c._NAME_HASH
|
|
@@ -221,7 +221,10 @@ def test_compatible_serializable(set_is_ci):
|
|
|
221
221
|
_ref_val = ReferenceField("ref_val", "MySimpleSerializable")
|
|
222
222
|
_dict_val = DictField("dict_val")
|
|
223
223
|
|
|
224
|
-
class
|
|
224
|
+
class MyMidSerializable(MySimpleSerializable):
|
|
225
|
+
_i_bool_val = Int64Field("i_bool_val", default=True)
|
|
226
|
+
|
|
227
|
+
class MySubSerializable(MyMidSerializable):
|
|
225
228
|
_m_int_val = Int64Field("m_int_val", default=250)
|
|
226
229
|
_m_str_val = StringField("m_str_val", default="SUB_STR")
|
|
227
230
|
|
maxframe/tensor/merge/vstack.py
CHANGED
maxframe/utils.py
CHANGED
|
@@ -1127,3 +1127,9 @@ def sync_pyodps_options():
|
|
|
1127
1127
|
|
|
1128
1128
|
def str_to_bool(s: Optional[str]) -> Optional[bool]:
|
|
1129
1129
|
return s.lower().strip() in ("true", "1") if s is not None else None
|
|
1130
|
+
|
|
1131
|
+
|
|
1132
|
+
def is_empty(val):
|
|
1133
|
+
if isinstance(val, (pd.DataFrame, pd.Series, pd.Index)):
|
|
1134
|
+
return val.empty
|
|
1135
|
+
return not bool(val)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: maxframe
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: MaxFrame operator-based data analyze framework
|
|
5
5
|
Requires-Dist: numpy<2.0.0,>=1.19.0
|
|
6
6
|
Requires-Dist: pandas>=1.0.0
|
|
@@ -11,11 +11,11 @@ Requires-Dist: msgpack>=1.0.0
|
|
|
11
11
|
Requires-Dist: traitlets>=5.0
|
|
12
12
|
Requires-Dist: cloudpickle<3.0.0,>=1.5.0
|
|
13
13
|
Requires-Dist: pyyaml>=5.1
|
|
14
|
+
Requires-Dist: pickle5; python_version < "3.8"
|
|
14
15
|
Requires-Dist: tornado>=6.0
|
|
15
16
|
Requires-Dist: defusedxml>=0.5.0
|
|
16
17
|
Requires-Dist: tqdm>=4.1.0
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
Requires-Dist: pickle5; python_version < "3.8"
|
|
18
|
+
Requires-Dist: importlib_metadata>=1.4
|
|
19
19
|
Provides-Extra: dev
|
|
20
20
|
Requires-Dist: black>=22.3.0; extra == "dev"
|
|
21
21
|
Requires-Dist: flake8>=5.0.4; extra == "dev"
|
|
@@ -1,35 +1,35 @@
|
|
|
1
|
-
maxframe-1.1.
|
|
2
|
-
maxframe-1.1.
|
|
3
|
-
maxframe-1.1.
|
|
4
|
-
maxframe-1.1.
|
|
1
|
+
maxframe-1.1.1.dist-info/RECORD,,
|
|
2
|
+
maxframe-1.1.1.dist-info/WHEEL,sha256=6Jttq6AfcWtuRmLm2_3IJTdjGV1CXy3Nk5dn_Y8g5fU,112
|
|
3
|
+
maxframe-1.1.1.dist-info/top_level.txt,sha256=64x-fc2q59c_vXwNUkehyjF1vb8JWqFSdYmUqIFqoTM,31
|
|
4
|
+
maxframe-1.1.1.dist-info/METADATA,sha256=Uy7LPFzY8VsjVyHUwHA-dqnSN6Jk2NWdWef35C-Vyik,3022
|
|
5
5
|
maxframe_client/conftest.py,sha256=7cwy2sFy5snEaxvtMvxfYFUnG6WtYC_9XxVrwJxOpcU,643
|
|
6
6
|
maxframe_client/__init__.py,sha256=0_6MYIqksNc-B0hORLb0yqNQUhtqdFD7TGg39bQ-_NI,689
|
|
7
|
-
maxframe_client/fetcher.py,sha256=
|
|
7
|
+
maxframe_client/fetcher.py,sha256=6gAKBI23GpkrgKyveHZlABYMWbzgiKwE7W4UWKhrzAI,9172
|
|
8
8
|
maxframe_client/clients/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
9
9
|
maxframe_client/clients/framedriver.py,sha256=-Ux7Q_bWuUSG8r14u84-1UVT9V08q_z4jGxV8kvPQaI,4557
|
|
10
10
|
maxframe_client/tests/test_session.py,sha256=XdbWE3jzmzphsPmbAk5L8xYVUnAXarDDzVhVYIWwnjE,11196
|
|
11
11
|
maxframe_client/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
12
12
|
maxframe_client/tests/test_fetcher.py,sha256=7mVDO456wcUMajguTJ4FWkSpuonLfinFfiz31ZYnHNs,4158
|
|
13
|
-
maxframe_client/session/task.py,sha256=
|
|
13
|
+
maxframe_client/session/task.py,sha256=eL_bQShBAtUttz13JyBbDkMv9UIpnL6wSk9qtwmUNqk,12027
|
|
14
14
|
maxframe_client/session/graph.py,sha256=rRilIWsiVfj_N160s8uv2s7mi_nhx7JxSa9BkhyLRnE,4376
|
|
15
15
|
maxframe_client/session/__init__.py,sha256=KPqhSlAJiuUz8TC-z5o7mHDVXzLSqWwrZ33zNni7piY,832
|
|
16
16
|
maxframe_client/session/consts.py,sha256=kQv67i4wyhV2ZQXwJf_5k4PRXhN811LmYoo2C3NB7tk,1391
|
|
17
|
-
maxframe_client/session/odps.py,sha256=
|
|
18
|
-
maxframe_client/session/tests/test_task.py,sha256=
|
|
17
|
+
maxframe_client/session/odps.py,sha256=8PbU-5jNhFO2psLQ0X_kiiqKcZXwgeshxZkoh6poMvA,24928
|
|
18
|
+
maxframe_client/session/tests/test_task.py,sha256=L1t8IPy9p60vuLgjdQym2ADSa9LI6cQArJ0pnXxtvS0,4701
|
|
19
19
|
maxframe_client/session/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
20
20
|
maxframe/_utils.pyx,sha256=I4kmfhNr-xXK2ak22dr4Vwahzn-JmTaYctbL3y9_UBQ,17017
|
|
21
|
-
maxframe/conftest.py,sha256=
|
|
21
|
+
maxframe/conftest.py,sha256=s0GB7iP-m8UTSrYF0P1TH-kffzhZsch7EaV2KObVZ8k,6293
|
|
22
22
|
maxframe/opcodes.py,sha256=Ef_CJvPvMi_JO4CYQfWRc_eZnMKtoiJWnO7eC-xNAOg,10348
|
|
23
23
|
maxframe/env.py,sha256=_K499f7giN7Iu9f39iI9p_naaEDoJ0rx8dInbzqFOVI,1402
|
|
24
24
|
maxframe/mixin.py,sha256=HBAeWYGb7N6ZIgkA-YpkKiSY1GetcEVNTuMb0ieznBs,3524
|
|
25
25
|
maxframe/protocol.py,sha256=kP8dnBhQEI6BcVFn2uuZZTmvr4xJA-R-SZi9ZJ_iqtY,18984
|
|
26
26
|
maxframe/session.py,sha256=FFciufr4jyKKWBDfbd_OwHGGT8zpRz_8rkZHrGhJMNM,36393
|
|
27
27
|
maxframe/__init__.py,sha256=SqTFS_1o2HDuVY1mhS0ELlqDuM-biwM_MN0EYGkJLf0,1004
|
|
28
|
-
maxframe/utils.py,sha256=
|
|
28
|
+
maxframe/utils.py,sha256=eubuRneXZ_fedIwMDLChgAq0WAXMuFjjyLtc4r_7zjg,34682
|
|
29
29
|
maxframe/extension.py,sha256=F5XTYzW5hNw0AIQz3d6u6Yk7adDdiV4c-HD7bF0X1FI,2659
|
|
30
30
|
maxframe/errors.py,sha256=vHcpVrKRHmoZPa6IwsdDT-jOZUTlhCp8c0e8F2C-5uU,966
|
|
31
31
|
maxframe/udf.py,sha256=HrZzDSNHmv63lCt4bMoPPPVV0HdzIKPL89khmj5yAAc,5157
|
|
32
|
-
maxframe/_utils.cpython-39-darwin.so,sha256=
|
|
32
|
+
maxframe/_utils.cpython-39-darwin.so,sha256=8G3-F5O9zAUqqvl-4AxQ02wyAQvZTIyhhH-lbyZxIpI,846560
|
|
33
33
|
maxframe/typing_.py,sha256=iYzgThxTu38yLRtyH5xFhMrurfFj7awMGytfObhvvcs,1180
|
|
34
34
|
maxframe/codegen.py,sha256=yxrKeRynJiQ3fN88BPFn5oHPKemEpESz20GI8ghDgv8,17733
|
|
35
35
|
maxframe/_utils.pxd,sha256=AhJ4vA_UqZqPshi5nvIZq1xgr80fhIVQ9dm5-UdkYJ8,1154
|
|
@@ -84,10 +84,10 @@ maxframe/dataframe/datasource/from_index.py,sha256=2061zsQn-BhyHTT0X9tE0JK8vLxQU
|
|
|
84
84
|
maxframe/dataframe/datasource/dataframe.py,sha256=LxAKF4gBIHhnJQPuaAUdIEyMAq7HTfiEeNVls5n4I4A,2023
|
|
85
85
|
maxframe/dataframe/datasource/series.py,sha256=QcYiBNcR8jjH6vdO6l6H9F46KHmlBqVCTI2tv9eyZ9w,1909
|
|
86
86
|
maxframe/dataframe/datasource/__init__.py,sha256=C8EKsHTJi-1jvJUKIpZtMtsK-ZID3dtxL1voXnaltTs,640
|
|
87
|
-
maxframe/dataframe/datasource/read_odps_query.py,sha256=
|
|
87
|
+
maxframe/dataframe/datasource/read_odps_query.py,sha256=eOYQdYEIdfQ9SqHkNbY_OxBdUVuH-KZNLDA7fxp8erk,13504
|
|
88
88
|
maxframe/dataframe/datasource/core.py,sha256=ozFmDgw1og7nK9_jU-u3tLEq9pNbitN-8w8XWdbKkJ0,2687
|
|
89
89
|
maxframe/dataframe/datasource/date_range.py,sha256=8JMr_Ife5pKCS_ca7W50Fyoc1JigOJirVzdVaPDzeFo,17227
|
|
90
|
-
maxframe/dataframe/datasource/read_odps_table.py,sha256=
|
|
90
|
+
maxframe/dataframe/datasource/read_odps_table.py,sha256=E2hv-9CxpXlxf0r_lc_G6Cs-voYQ1BunuC9A28K5Sx4,9409
|
|
91
91
|
maxframe/dataframe/datasource/read_parquet.py,sha256=9auOcy8snTxCOohgXZCUXfT_O39irdkBngZH5svgx0E,14531
|
|
92
92
|
maxframe/dataframe/datasource/from_tensor.py,sha256=4viuN5SLLye7Xeb8kouOpm-osoQ2yEovWTDNPQuW8gE,14727
|
|
93
93
|
maxframe/dataframe/datasource/from_records.py,sha256=WBYouYyg7m_8NJdN-yUWSfJlIpm6DVP3IMfLXZFugyI,3442
|
|
@@ -306,7 +306,7 @@ maxframe/core/entity/objects.py,sha256=EnS0F2ageFTEvNfylqp21LTfTv3Q3p0X0Ur6HgWr2
|
|
|
306
306
|
maxframe/core/entity/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
307
307
|
maxframe/core/entity/tests/test_objects.py,sha256=-HS2dux6OdmwtMY6GRxwASq2gPSRY5S9YwTqeG-dGaA,1349
|
|
308
308
|
maxframe/core/graph/__init__.py,sha256=tqUUWDOXp2KFjO7zv1dN_-ttE-ef09-S1GNt8EQ35Bk,765
|
|
309
|
-
maxframe/core/graph/core.cpython-39-darwin.so,sha256=
|
|
309
|
+
maxframe/core/graph/core.cpython-39-darwin.so,sha256=4C80O3QFqwIZEZhYGdPUKwysBU1z1KizXBsUks5lAxA,685456
|
|
310
310
|
maxframe/core/graph/entity.py,sha256=3ifzsEDIxzDjeM9MhlSwgz92GuaEEoCVWxEkEu2xIgE,4863
|
|
311
311
|
maxframe/core/graph/core.pyx,sha256=kyqE5-X9Tc82wU4N_zsf8jNthAHWHTVRNFQWNNbzgpM,15923
|
|
312
312
|
maxframe/core/graph/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
@@ -326,12 +326,12 @@ maxframe/core/operator/objects.py,sha256=lEicZmIsR2sgWsH3oAeeYMde8C0TYeu2vkAyGj8
|
|
|
326
326
|
maxframe/core/operator/base.py,sha256=IXBJ0Nd8JAcnwN8FQ1H-QXiMRKAz2_LUZstku7Msv1E,15336
|
|
327
327
|
maxframe/core/operator/tests/test_core.py,sha256=57aICnc5VLqdVK7icAORTWC81bSjBxeeVWIJcha9J_0,1691
|
|
328
328
|
maxframe/core/operator/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
329
|
-
maxframe/config/config.py,sha256=
|
|
329
|
+
maxframe/config/config.py,sha256=3E7VsRV-IVJ8lu0SDU4NMdizuJBVEu0uh_uUFIFOzBU,16280
|
|
330
330
|
maxframe/config/validators.py,sha256=UjbxMKZcDG98-9uCQESm_V56d-VUD7kQGV0KJghVbj8,2511
|
|
331
|
-
maxframe/config/__init__.py,sha256=
|
|
331
|
+
maxframe/config/__init__.py,sha256=mkW-3nDoNFlRIECv2WbZBv71of2X4KmTUpXK2zuUFjg,683
|
|
332
332
|
maxframe/config/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
333
333
|
maxframe/config/tests/test_validators.py,sha256=U_7yKSl0FdVdDwKU1EsnCzNWaOXi8xrIC08x7hb_O4c,1240
|
|
334
|
-
maxframe/config/tests/test_config.py,sha256=
|
|
334
|
+
maxframe/config/tests/test_config.py,sha256=iddxMP_OfRtcgVtXi42VanLF7CkDy_IwASDVZA9PHds,3283
|
|
335
335
|
maxframe/serialization/exception.py,sha256=2Ubi2ld5XpduqAln26q0T67XLj-OpdwW1Z-nyC_wtCI,2993
|
|
336
336
|
maxframe/serialization/core.pxd,sha256=eBrSXiAPlX83Kbwml5IKJvqsIT03lPCeS6is9HplLiU,1501
|
|
337
337
|
maxframe/serialization/pandas.py,sha256=56qngiE78mwHw7srbDE_rnN1XxgX34B7wdAhDJQRfaE,8488
|
|
@@ -340,22 +340,22 @@ maxframe/serialization/arrow.py,sha256=VnGxNLU9UV_cUPTze43bEFCIbYLAOZnp2pAwVJbAI
|
|
|
340
340
|
maxframe/serialization/__init__.py,sha256=LrwesIKJ6MR_mhxW7qRXJXohH9waubZMR9-YicGDMUs,936
|
|
341
341
|
maxframe/serialization/maxframe_objects.py,sha256=R9WEjbHL0Kr56OGkYDU9fcGi7gII6fGlXhi6IyihTsM,1365
|
|
342
342
|
maxframe/serialization/numpy.py,sha256=8_GSo45l_eNoMn4NAGEb9NLXY_9i4tf9KK4EzG0mKpA,3213
|
|
343
|
-
maxframe/serialization/core.cpython-39-darwin.so,sha256=
|
|
343
|
+
maxframe/serialization/core.cpython-39-darwin.so,sha256=PS7k1GENld0B1hvdOdUpsKENmFpdmCtxFxdr4Erenm0,1178128
|
|
344
344
|
maxframe/serialization/scipy.py,sha256=hP0fAW0di9UgJrGtANB2S8hLDbFBtR8p5NDqAMt5rDI,2427
|
|
345
345
|
maxframe/serialization/core.pyx,sha256=mqfb7YUd-Vop8wmGJTjziDP59YX2tr1iN6puRmFI7dg,35551
|
|
346
346
|
maxframe/serialization/tests/test_serial.py,sha256=fL1ufMU7Lf1fgQ4fwJ0QrKWGQIsw_zHtAQ9zkRfFrOI,12543
|
|
347
347
|
maxframe/serialization/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
348
348
|
maxframe/serialization/serializables/field.py,sha256=atVgX-9rsVG1fTev7vjQArVwIEaCRjXoSEjpQ3mh6bA,16015
|
|
349
349
|
maxframe/serialization/serializables/__init__.py,sha256=_wyFZF5QzSP32wSXlXHEPl98DN658I66WamP8XPJy0c,1351
|
|
350
|
-
maxframe/serialization/serializables/core.py,sha256=
|
|
350
|
+
maxframe/serialization/serializables/core.py,sha256=drZB6nSOi7P-W-bbZpH2PirlSJNreq44PdlKI6qSEFQ,17509
|
|
351
351
|
maxframe/serialization/serializables/field_type.py,sha256=tgaLzbJ9RmzPOkL_iOfl9E8njZ5J7MtRkGLDnY0lRz8,14933
|
|
352
352
|
maxframe/serialization/serializables/tests/test_field_type.py,sha256=T3ebXbUkKveC9Pq1nIl85e4eYascFeJ52d0REHbz5jo,4381
|
|
353
353
|
maxframe/serialization/serializables/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
354
|
-
maxframe/serialization/serializables/tests/test_serializable.py,sha256=
|
|
354
|
+
maxframe/serialization/serializables/tests/test_serializable.py,sha256=wpdqiKnMYpQm0ztbJqMF7-vFnjYe4zWiSMXJnAFGt3I,10266
|
|
355
355
|
maxframe/io/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
|
356
|
-
maxframe/io/odpsio/tableio.py,sha256=
|
|
357
|
-
maxframe/io/odpsio/arrow.py,sha256=
|
|
358
|
-
maxframe/io/odpsio/__init__.py,sha256=
|
|
356
|
+
maxframe/io/odpsio/tableio.py,sha256=Imt5_o53Lg0kqJwv0rdIkIDkhimU4BPYoyHzFKXfTS8,22452
|
|
357
|
+
maxframe/io/odpsio/arrow.py,sha256=Fq5LFyiu8zprCoRs8ITWEn8pIFCZzoYZrL4IDreYURQ,5929
|
|
358
|
+
maxframe/io/odpsio/__init__.py,sha256=GVR_nKbpwG7DTEp2oOoNb2lMWudIJBIAQfdQQ_2Meh4,917
|
|
359
359
|
maxframe/io/odpsio/volumeio.py,sha256=y3JwUgBryoBFrkPppT7hCf6VGyJDADM0MUlBXmcia5w,2948
|
|
360
360
|
maxframe/io/odpsio/schema.py,sha256=U6kHDkRnp3uluMN36ojPQ1SzfiErfdcphJvxUYejQZQ,12980
|
|
361
361
|
maxframe/io/odpsio/tests/test_tableio.py,sha256=2yuQnzmgDZpDlhSrLjOcOWnnnRZrTfYcCMhvCQOoa7Q,5746
|
|
@@ -375,7 +375,7 @@ maxframe/tests/utils.py,sha256=KQu_NR6EQ0dCDlJuaeMFTIzmZjovNS2KrBVED5UVVZo,5343
|
|
|
375
375
|
maxframe/tests/test_codegen.py,sha256=GMrnpSb2eyB_nmuv8-_p47Kw877ElKS3BP52SpqZNIQ,2208
|
|
376
376
|
maxframe/lib/wrapped_pickle.py,sha256=HJCb8ERK6clUVgPe529vduMmbMVqBlrQ3W8mH3tYcaE,3836
|
|
377
377
|
maxframe/lib/version.py,sha256=yQ6HkDOvU9X1rpI49auh-qku2g7gIiztgEH6v1urOrk,18321
|
|
378
|
-
maxframe/lib/mmh3.cpython-39-darwin.so,sha256=
|
|
378
|
+
maxframe/lib/mmh3.cpython-39-darwin.so,sha256=A0-BGi9gNBC2CzLifQiZbMifToeQAHP8drOCWZvEbrQ,119784
|
|
379
379
|
maxframe/lib/compression.py,sha256=k9DSrl_dNBsn5azLjBdL5B4WZ6eNvmCrdMbcF1G7JSc,1442
|
|
380
380
|
maxframe/lib/__init__.py,sha256=CzfbLNqqm1yR1i6fDwCd4h1ptuKVDbURFVCb0ra7QNc,642
|
|
381
381
|
maxframe/lib/mmh3.pyi,sha256=AOp_XqbA5-NwepeeBeG0OFJj5tjEAFLzcViyRNZ0eVI,1494
|
|
@@ -466,7 +466,7 @@ maxframe/tensor/datasource/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQB
|
|
|
466
466
|
maxframe/tensor/rechunk/__init__.py,sha256=pQ4vh6rNzxjIkxrWTxXjAcTDmUw3961-H1f__-lJl6A,797
|
|
467
467
|
maxframe/tensor/rechunk/rechunk.py,sha256=ltvGlUQxzoHSE7bC6J6uQ8sO-YhuFpxmj8-ArJJXIoY,1392
|
|
468
468
|
maxframe/tensor/merge/concatenate.py,sha256=q0qVpizcU7E6op6D54bF4bl2eMLIJOOwb8AkG1jrDCE,3213
|
|
469
|
-
maxframe/tensor/merge/vstack.py,sha256=
|
|
469
|
+
maxframe/tensor/merge/vstack.py,sha256=XGyubfPwkauUQ2zw4D9qd5uLLSRw5LgadHBpEQH71V8,2268
|
|
470
470
|
maxframe/tensor/merge/__init__.py,sha256=NCuoHVwBtVUQnl8-0ph9cT5ARRn3pTdqZt76-XSvpvs,686
|
|
471
471
|
maxframe/tensor/merge/stack.py,sha256=4ZMVqtJQ0nQtbtnJKTX0Z_fSUn8slLLSO56mFT-b0gE,4145
|
|
472
472
|
maxframe/tensor/merge/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
|
maxframe_client/fetcher.py
CHANGED
|
@@ -20,7 +20,6 @@ import pandas as pd
|
|
|
20
20
|
import pyarrow as pa
|
|
21
21
|
from odps import ODPS
|
|
22
22
|
from odps.models import ExternalVolume
|
|
23
|
-
from odps.tunnel import TableTunnel
|
|
24
23
|
|
|
25
24
|
from maxframe.core import OBJECT_TYPE
|
|
26
25
|
from maxframe.dataframe.core import DATAFRAME_TYPE
|
|
@@ -28,6 +27,7 @@ from maxframe.io.objects import get_object_io_handler
|
|
|
28
27
|
from maxframe.io.odpsio import (
|
|
29
28
|
ODPSTableIO,
|
|
30
29
|
ODPSVolumeReader,
|
|
30
|
+
TunnelTableIO,
|
|
31
31
|
arrow_to_pandas,
|
|
32
32
|
build_dataframe_table_meta,
|
|
33
33
|
odps_schema_to_pandas_dtypes,
|
|
@@ -136,11 +136,10 @@ class ODPSTableFetcher(ToThreadMixin, ResultFetcher):
|
|
|
136
136
|
dtypes = odps_schema_to_pandas_dtypes(table.table_schema)
|
|
137
137
|
tileable.refresh_from_dtypes(dtypes)
|
|
138
138
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
total_records += session.count
|
|
139
|
+
part_sessions = TunnelTableIO.create_download_sessions(
|
|
140
|
+
self._odps_entry, info.full_table_name, part_specs
|
|
141
|
+
)
|
|
142
|
+
total_records = sum(session.count for session in part_sessions.values())
|
|
144
143
|
|
|
145
144
|
new_shape_list = list(tileable.shape)
|
|
146
145
|
new_shape_list[0] = total_records
|
maxframe_client/session/odps.py
CHANGED
|
@@ -84,10 +84,21 @@ class MaxFrameServiceCaller(metaclass=abc.ABCMeta):
|
|
|
84
84
|
def get_settings_to_upload(self) -> Dict[str, Any]:
|
|
85
85
|
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
86
86
|
sql_settings.update(options.sql.settings or {})
|
|
87
|
-
|
|
88
87
|
quota_name = options.session.quota_name or getattr(
|
|
89
88
|
odps_options, "quota_name", None
|
|
90
89
|
)
|
|
90
|
+
quota_settings = {
|
|
91
|
+
sql_settings.get("odps.task.wlm.quota", None),
|
|
92
|
+
options.spe.task.settings.get("odps.task.wlm.quota", None),
|
|
93
|
+
options.pythonpack.task.settings.get("odps.task.wlm.quota", None),
|
|
94
|
+
quota_name,
|
|
95
|
+
}.difference([None])
|
|
96
|
+
if len(quota_settings) >= 2:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
"Quota settings are conflicting: %s" % ", ".join(sorted(quota_settings))
|
|
99
|
+
)
|
|
100
|
+
elif len(quota_settings) == 1:
|
|
101
|
+
quota_name = quota_settings.pop()
|
|
91
102
|
lifecycle = options.session.table_lifecycle or odps_options.lifecycle
|
|
92
103
|
temp_lifecycle = (
|
|
93
104
|
options.session.temp_table_lifecycle or odps_options.temp_lifecycle
|
|
@@ -332,6 +343,11 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
|
|
|
332
343
|
self._last_settings = copy.deepcopy(new_settings)
|
|
333
344
|
return new_settings
|
|
334
345
|
|
|
346
|
+
if self._last_settings.get("session.quota_name", None) != new_settings.get(
|
|
347
|
+
"session.quota_name", None
|
|
348
|
+
):
|
|
349
|
+
raise ValueError("Quota name cannot be changed after sessions are created")
|
|
350
|
+
|
|
335
351
|
update = dict()
|
|
336
352
|
for k in new_settings.keys():
|
|
337
353
|
old_item = self._last_settings.get(k)
|
maxframe_client/session/task.py
CHANGED
|
@@ -126,10 +126,13 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
126
126
|
|
|
127
127
|
def _create_maxframe_task(self) -> MaxFrameTask:
|
|
128
128
|
task = MaxFrameTask(name=self._task_name, major_version=self._major_version)
|
|
129
|
+
mf_settings = self.get_settings_to_upload()
|
|
129
130
|
mf_opts = {
|
|
130
|
-
"odps.maxframe.settings": json.dumps(
|
|
131
|
+
"odps.maxframe.settings": json.dumps(mf_settings),
|
|
131
132
|
"odps.maxframe.output_format": self._output_format,
|
|
132
133
|
}
|
|
134
|
+
if mf_settings.get("session.quota_name", None):
|
|
135
|
+
mf_opts["odps.task.wlm.quota"] = mf_settings["session.quota_name"]
|
|
133
136
|
if mf_version:
|
|
134
137
|
mf_opts["odps.maxframe.client_version"] = mf_version
|
|
135
138
|
task.update_settings(mf_opts)
|
|
@@ -11,17 +11,20 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
14
|
import json
|
|
16
15
|
import os
|
|
17
16
|
|
|
18
17
|
import mock
|
|
18
|
+
import pytest
|
|
19
19
|
from defusedxml import ElementTree
|
|
20
20
|
from odps import ODPS
|
|
21
21
|
from odps import options as odps_options
|
|
22
22
|
|
|
23
|
+
from maxframe import options
|
|
24
|
+
from maxframe.config import option_context
|
|
25
|
+
|
|
23
26
|
from ...session.consts import MAXFRAME_OUTPUT_JSON_FORMAT
|
|
24
|
-
from ...session.task import MaxFrameInstanceCaller, MaxFrameTask
|
|
27
|
+
from ...session.task import MaxFrameInstanceCaller, MaxFrameTask, MaxFrameTaskSession
|
|
25
28
|
|
|
26
29
|
expected_file_dir = os.path.join(os.path.dirname(__file__), "expected-data")
|
|
27
30
|
|
|
@@ -79,3 +82,33 @@ def test_maxframe_instance_caller_creating_session():
|
|
|
79
82
|
finally:
|
|
80
83
|
odps_options.priority = old_priority
|
|
81
84
|
odps_options.get_priority = old_get_priority
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@pytest.mark.asyncio
|
|
88
|
+
async def test_session_quota_flag_valid():
|
|
89
|
+
def mock_create(self, task: MaxFrameTask, **kwargs):
|
|
90
|
+
assert task.properties["settings"]
|
|
91
|
+
task_settings = json.loads(task.properties["settings"])
|
|
92
|
+
assert task_settings["odps.task.wlm.quota"] == "session_quota"
|
|
93
|
+
|
|
94
|
+
with mock.patch.multiple(
|
|
95
|
+
target="maxframe_client.session.task.MaxFrameInstanceCaller",
|
|
96
|
+
_wait_instance_task_ready=mock.DEFAULT,
|
|
97
|
+
get_session=mock.DEFAULT,
|
|
98
|
+
get_logview_address=mock.DEFAULT,
|
|
99
|
+
), mock.patch("odps.models.instances.BaseInstances.create", mock_create):
|
|
100
|
+
with option_context({"session.quota_name": "session_quota"}):
|
|
101
|
+
with pytest.raises(ValueError):
|
|
102
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota2"
|
|
103
|
+
await MaxFrameTaskSession.init(
|
|
104
|
+
address="test", odps_entry=ODPS.from_environments()
|
|
105
|
+
)
|
|
106
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota"
|
|
107
|
+
mf_task_session = await MaxFrameTaskSession.init(
|
|
108
|
+
address="test", odps_entry=ODPS.from_environments()
|
|
109
|
+
)
|
|
110
|
+
with pytest.raises(ValueError):
|
|
111
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota2"
|
|
112
|
+
mf_task_session._get_diff_settings()
|
|
113
|
+
options.sql.settings["odps.task.wlm.quota"] = "session_quota"
|
|
114
|
+
mf_task_session._get_diff_settings()
|
|
File without changes
|