maxframe 0.1.0b2__cp37-cp37m-macosx_10_9_x86_64.whl → 0.1.0b3__cp37-cp37m-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Binary file
maxframe/codegen.py CHANGED
@@ -17,7 +17,7 @@ import base64
17
17
  import dataclasses
18
18
  import logging
19
19
  from enum import Enum
20
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type
20
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
21
21
 
22
22
  from odps.types import OdpsSchema
23
23
  from odps.utils import camel_to_underline
@@ -30,6 +30,7 @@ from .odpsio import build_dataframe_table_meta
30
30
  from .odpsio.schema import pandas_to_odps_schema
31
31
  from .protocol import DataFrameTableMeta, ResultInfo
32
32
  from .serialization import PickleContainer
33
+ from .serialization.serializables import Serializable, StringField
33
34
  from .typing_ import PandasObjectTypes
34
35
  from .udf import MarkedFunction
35
36
 
@@ -48,8 +49,11 @@ class CodeGenResult:
48
49
  constants: Dict[str, Any]
49
50
 
50
51
 
51
- class AbstractUDF(abc.ABC):
52
- _session_id: str
52
+ class AbstractUDF(Serializable):
53
+ _session_id: str = StringField("session_id")
54
+
55
+ def __init__(self, session_id: Optional[str] = None, **kw):
56
+ super().__init__(_session_id=session_id, **kw)
53
57
 
54
58
  @property
55
59
  def name(self) -> str:
@@ -74,7 +78,66 @@ class AbstractUDF(abc.ABC):
74
78
 
75
79
  class UserCodeMixin:
76
80
  @classmethod
77
- def generate_pickled_codes(cls, code_to_pickle: Any) -> List[str]:
81
+ def obj_to_python_expr(cls, obj: Any = None) -> str:
82
+ """
83
+ Parameters
84
+ ----------
85
+ obj
86
+ The object to convert to python expr.
87
+ Returns
88
+ -------
89
+ str :
90
+ The str type content equals to the object when use in the python code directly.
91
+ """
92
+ if obj is None:
93
+ return "None"
94
+
95
+ if isinstance(obj, (int, float)):
96
+ return repr(obj)
97
+
98
+ if isinstance(obj, bool):
99
+ return "True" if obj else "False"
100
+
101
+ if isinstance(obj, bytes):
102
+ base64_bytes = base64.b64encode(obj)
103
+ return f"base64.b64decode({base64_bytes})"
104
+
105
+ if isinstance(obj, str):
106
+ return repr(obj)
107
+
108
+ if isinstance(obj, list):
109
+ return (
110
+ f"[{', '.join([cls.obj_to_python_expr(element) for element in obj])}]"
111
+ )
112
+
113
+ if isinstance(obj, dict):
114
+ items = (
115
+ f"{repr(key)}: {cls.obj_to_python_expr(value)}"
116
+ for key, value in obj.items()
117
+ )
118
+ return f"{{{', '.join(items)}}}"
119
+
120
+ if isinstance(obj, tuple):
121
+ return f"({', '.join([cls.obj_to_python_expr(sub_obj) for sub_obj in obj])}{',' if len(obj) == 1 else ''})"
122
+
123
+ if isinstance(obj, set):
124
+ return (
125
+ f"{{{', '.join([cls.obj_to_python_expr(sub_obj) for sub_obj in obj])}}}"
126
+ if obj
127
+ else "set()"
128
+ )
129
+
130
+ if isinstance(obj, PickleContainer):
131
+ return UserCodeMixin.generate_pickled_codes(obj, None)
132
+
133
+ raise ValueError(f"not support arg type {type(obj)}")
134
+
135
+ @classmethod
136
+ def generate_pickled_codes(
137
+ cls,
138
+ code_to_pickle: Any,
139
+ unpicked_data_var_name: Union[str, None] = "pickled_data",
140
+ ) -> str:
78
141
  """
79
142
  Generate pickled codes. The final pickled variable is called 'pickled_data'.
80
143
 
@@ -82,20 +145,20 @@ class UserCodeMixin:
82
145
  ----------
83
146
  code_to_pickle: Any
84
147
  The code to be pickled.
148
+ unpicked_data_var_name: str
149
+ The variables in code used to hold the loads object from the cloudpickle
85
150
 
86
151
  Returns
87
152
  -------
88
- List[str] :
89
- The code snippets of pickling, the final variable is called 'pickled_data'.
153
+ str :
154
+ The code snippets of pickling, the final variable is called 'pickled_data' by default.
90
155
  """
91
156
  pickled, buffers = cls.dump_pickled_data(code_to_pickle)
92
- pickled = base64.b64encode(pickled)
93
- buffers = [base64.b64encode(b) for b in buffers]
94
- buffers_str = ", ".join(f"base64.b64decode(b'{b.decode()}')" for b in buffers)
95
- return [
96
- f"base64_data = base64.b64decode(b'{pickled.decode()}')",
97
- f"pickled_data = cloudpickle.loads(base64_data, buffers=[{buffers_str}])",
98
- ]
157
+ pickle_loads_expr = f"cloudpickle.loads({cls.obj_to_python_expr(pickled)}, buffers={cls.obj_to_python_expr(buffers)})"
158
+ if unpicked_data_var_name:
159
+ return f"{unpicked_data_var_name} = {pickle_loads_expr}"
160
+
161
+ return pickle_loads_expr
99
162
 
100
163
  @staticmethod
101
164
  def dump_pickled_data(
@@ -114,8 +177,9 @@ class UserCodeMixin:
114
177
 
115
178
 
116
179
  class BigDagCodeContext(metaclass=abc.ABCMeta):
117
- def __init__(self, session_id: str = None):
180
+ def __init__(self, session_id: str = None, subdag_id: str = None):
118
181
  self._session_id = session_id
182
+ self._subdag_id = subdag_id
119
183
  self._tileable_key_to_variables = dict()
120
184
  self.constants = dict()
121
185
  self._data_table_meta_cache = dict()
@@ -142,10 +206,14 @@ class BigDagCodeContext(metaclass=abc.ABCMeta):
142
206
  except KeyError:
143
207
  var_name = self._tileable_key_to_variables[
144
208
  tileable.key
145
- ] = f"var_{self._next_var_id}"
146
- self._next_var_id += 1
209
+ ] = self.next_var_name()
147
210
  return var_name
148
211
 
212
+ def next_var_name(self) -> str:
213
+ var_name = f"var_{self._next_var_id}"
214
+ self._next_var_id += 1
215
+ return var_name
216
+
149
217
  def get_odps_schema(
150
218
  self, data: PandasObjectTypes, unknown_as_string: bool = False
151
219
  ) -> OdpsSchema:
@@ -275,9 +343,10 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
275
343
  engine_priority: int = 0
276
344
  _extension_loaded = False
277
345
 
278
- def __init__(self, session_id: str):
346
+ def __init__(self, session_id: str, subdag_id: str = None):
279
347
  self._session_id = session_id
280
- self._context = self._init_context(session_id)
348
+ self._subdag_id = subdag_id
349
+ self._context = self._init_context(session_id, subdag_id)
281
350
 
282
351
  @classmethod
283
352
  def _load_engine_extensions(cls):
@@ -307,7 +376,7 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
307
376
  raise NotImplementedError
308
377
 
309
378
  @abc.abstractmethod
310
- def _init_context(self, session_id: str) -> BigDagCodeContext:
379
+ def _init_context(self, session_id: str, subdag_id: str) -> BigDagCodeContext:
311
380
  raise NotImplementedError
312
381
 
313
382
  def _generate_comments(
maxframe/config/config.py CHANGED
@@ -340,6 +340,12 @@ default_options.register_option(
340
340
  validator=is_integer,
341
341
  remote=True,
342
342
  )
343
+ default_options.register_option(
344
+ "session.subinstance_priority",
345
+ None,
346
+ validator=any_validator(is_null, is_integer),
347
+ remote=True,
348
+ )
343
349
 
344
350
  default_options.register_option("warn_duplicated_execution", False, validator=is_bool)
345
351
  default_options.register_option("dataframe.use_arrow_dtype", True, validator=is_bool)
@@ -66,6 +66,7 @@ class DecrefRunner:
66
66
  if self._decref_thread: # pragma: no branch
67
67
  self._queue.put_nowait((None, None, None))
68
68
  self._decref_thread.join(1)
69
+ self._decref_thread = None
69
70
 
70
71
  def put(self, key: str, session_ref: ref):
71
72
  if self._decref_thread is None:
@@ -15,6 +15,7 @@
15
15
  from typing import Any, Dict
16
16
 
17
17
  from ...serialization.serializables import FieldTypes, ListField
18
+ from ...utils import skip_na_call
18
19
  from .chunks import Chunk, ChunkData
19
20
  from .core import Entity
20
21
  from .executable import _ToObjectMixin
@@ -62,8 +63,8 @@ class ObjectData(TileableData, _ToObjectMixin):
62
63
  _chunks = ListField(
63
64
  "chunks",
64
65
  FieldTypes.reference(ObjectChunkData),
65
- on_serialize=lambda x: [it.data for it in x] if x is not None else x,
66
- on_deserialize=lambda x: [ObjectChunk(it) for it in x] if x is not None else x,
66
+ on_serialize=skip_na_call(lambda x: [it.data for it in x]),
67
+ on_deserialize=skip_na_call(lambda x: [ObjectChunk(it) for it in x]),
67
68
  )
68
69
 
69
70
  def __init__(self, op=None, nsplits=None, **kw):
@@ -39,6 +39,7 @@ from .datasource.read_odps_query import read_odps_query
39
39
  from .datasource.read_odps_table import read_odps_table
40
40
  from .datasource.read_parquet import read_parquet
41
41
  from .datastore.to_odps import to_odps_table
42
+ from .groupby import NamedAgg
42
43
  from .initializer import DataFrame, Index, Series, read_pandas
43
44
  from .merge import concat, merge
44
45
  from .misc.cut import cut
@@ -52,7 +53,7 @@ from .reduction import CustomReduction, unique
52
53
  from .tseries.to_datetime import to_datetime
53
54
 
54
55
  try:
55
- from pandas import NA, NamedAgg, Timestamp
56
+ from pandas import NA, Timestamp
56
57
  except ImportError: # pragma: no cover
57
58
  pass
58
59
 
@@ -46,7 +46,7 @@ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
46
46
  r"In Task ([^:]+)[\S\s]+FS: output: ([^\n #]+)[\s\S]+schema:\s+([\S\s]+)$",
47
47
  re.MULTILINE,
48
48
  )
49
- _EXPLAIN_COLUMN_REGEX = re.compile(r"([^ ]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
49
+ _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
50
50
 
51
51
 
52
52
  @dataclasses.dataclass
@@ -69,7 +69,7 @@ class DataFrameReadODPSTable(
69
69
  return getattr(self, "partition_spec", None)
70
70
 
71
71
  def get_columns(self):
72
- return self.columns
72
+ return self.columns or list(self.dtypes.index)
73
73
 
74
74
  def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
75
75
  self.columns = columns
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import os
15
16
  from collections import OrderedDict
16
17
 
17
18
  import numpy as np
@@ -33,6 +34,7 @@ from ..from_tensor import (
33
34
  )
34
35
  from ..index import from_pandas as from_pandas_index
35
36
  from ..index import from_tileable
37
+ from ..read_odps_query import ColumnSchema, _resolve_task_sector
36
38
  from ..series import from_pandas as from_pandas_series
37
39
 
38
40
  ray = lazy_import("ray")
@@ -228,6 +230,7 @@ def test_from_odps_table():
228
230
  assert df.op.table_name == test_table.full_table_name
229
231
  assert df.index_value.name is None
230
232
  assert isinstance(df.index_value.value, IndexValue.RangeIndex)
233
+ assert df.op.get_columns() == ["col1", "col2", "col3"]
231
234
  pd.testing.assert_series_equal(
232
235
  df.dtypes,
233
236
  pd.Series(
@@ -247,6 +250,7 @@ def test_from_odps_table():
247
250
  assert df.op.table_name == test_table.full_table_name
248
251
  assert df.index_value.name is None
249
252
  assert isinstance(df.index_value.value, IndexValue.RangeIndex)
253
+ assert df.op.get_columns() == ["col1", "col2"]
250
254
  pd.testing.assert_series_equal(
251
255
  df.dtypes,
252
256
  pd.Series([np.dtype("O"), np.dtype("int64")], index=["col1", "col2"]),
@@ -257,6 +261,7 @@ def test_from_odps_table():
257
261
  assert df.index_value.name == "col1"
258
262
  assert isinstance(df.index_value.value, IndexValue.Index)
259
263
  assert df.index.dtype == np.dtype("O")
264
+ assert df.op.get_columns() == ["col2", "col3"]
260
265
  pd.testing.assert_series_equal(
261
266
  df.dtypes,
262
267
  pd.Series([np.dtype("int64"), np.dtype("float64")], index=["col2", "col3"]),
@@ -267,6 +272,7 @@ def test_from_odps_table():
267
272
 
268
273
  df = read_odps_table(test_parted_table, append_partitions=True)
269
274
  assert df.op.append_partitions is True
275
+ assert df.op.get_columns() == ["col1", "col2", "col3", "pt"]
270
276
  pd.testing.assert_series_equal(
271
277
  df.dtypes,
272
278
  pd.Series(
@@ -280,6 +286,7 @@ def test_from_odps_table():
280
286
  )
281
287
  assert df.op.append_partitions is True
282
288
  assert df.op.partitions == ["pt=20240103"]
289
+ assert df.op.get_columns() == ["col1", "col2", "pt"]
283
290
  pd.testing.assert_series_equal(
284
291
  df.dtypes,
285
292
  pd.Series(
@@ -377,3 +384,18 @@ def test_date_range():
377
384
  assert dr.index_value.is_unique == expected.is_unique
378
385
  assert dr.index_value.is_monotonic_increasing == expected.is_monotonic_increasing
379
386
  assert dr.name == expected.name
387
+
388
+
389
+ def test_resolve_task_sector():
390
+ input_path = os.path.join(os.path.dirname(__file__), "test-data", "task-input.txt")
391
+ with open(input_path, "r") as f:
392
+ sector = f.read()
393
+ actual_sector = _resolve_task_sector("job0", sector)
394
+
395
+ assert actual_sector.job_name == "job0"
396
+ assert actual_sector.task_name == "M1"
397
+ assert actual_sector.output_target == "Screen"
398
+ assert len(actual_sector.schema) == 78
399
+ assert actual_sector.schema[0] == ColumnSchema("unnamed: 0", "bigint", "")
400
+ assert actual_sector.schema[1] == ColumnSchema("id", "bigint", "id_alias")
401
+ assert actual_sector.schema[2] == ColumnSchema("listing_url", "string", "")
@@ -14,6 +14,7 @@
14
14
 
15
15
  # noinspection PyUnresolvedReferences
16
16
  from ..core import DataFrameGroupBy, GroupBy, SeriesGroupBy
17
+ from .core import NamedAgg
17
18
 
18
19
 
19
20
  def _install():
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from collections import namedtuple
16
+
15
17
  import pandas as pd
16
18
 
17
19
  from ... import opcodes
@@ -30,6 +32,9 @@ _GROUP_KEYS_NO_DEFAULT = pd_release_version >= (1, 5, 0)
30
32
  _default_group_keys = no_default if _GROUP_KEYS_NO_DEFAULT else True
31
33
 
32
34
 
35
+ NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
36
+
37
+
33
38
  class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
34
39
  _op_type_ = opcodes.GROUPBY
35
40
 
@@ -14,11 +14,14 @@
14
14
 
15
15
  import asyncio
16
16
  import atexit
17
+ import itertools
17
18
  import threading
18
19
  from typing import Dict, Optional
19
20
 
20
21
 
21
22
  class Isolation:
23
+ _counter = itertools.count().__next__
24
+
22
25
  loop: asyncio.AbstractEventLoop
23
26
  _stopped: Optional[asyncio.Event]
24
27
  _thread: Optional[threading.Thread]
@@ -38,7 +41,9 @@ class Isolation:
38
41
 
39
42
  def start(self):
40
43
  if self._threaded:
41
- self._thread = thread = threading.Thread(target=self._run)
44
+ self._thread = thread = threading.Thread(
45
+ name=f"IsolationThread-{self._counter()}", target=self._run
46
+ )
42
47
  thread.daemon = True
43
48
  thread.start()
44
49
  self._thread_ident = thread.ident
Binary file
maxframe/protocol.py CHANGED
@@ -46,6 +46,8 @@ BodyType = TypeVar("BodyType", bound="Serializable")
46
46
 
47
47
 
48
48
  class JsonSerializable(Serializable):
49
+ _ignore_non_existing_keys = True
50
+
49
51
  @classmethod
50
52
  def from_json(cls, serialized: dict) -> "JsonSerializable":
51
53
  raise NotImplementedError
@@ -245,6 +247,8 @@ class DagInfo(JsonSerializable):
245
247
  default_factory=dict,
246
248
  )
247
249
  error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
250
+ start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
251
+ end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
248
252
 
249
253
  @classmethod
250
254
  def from_json(cls, serialized: dict) -> "DagInfo":
@@ -265,7 +269,10 @@ class DagInfo(JsonSerializable):
265
269
  "dag_id": self.dag_id,
266
270
  "status": self.status.value,
267
271
  "progress": self.progress,
272
+ "start_timestamp": self.start_timestamp,
273
+ "end_timestamp": self.end_timestamp,
268
274
  }
275
+ ret = {k: v for k, v in ret.items() if v is not None}
269
276
  if self.tileable_to_result_infos:
270
277
  ret["tileable_to_result_infos"] = {
271
278
  k: v.to_json() for k, v in self.tileable_to_result_infos.items()
@@ -112,6 +112,7 @@ class Serializable(metaclass=SerializableMeta):
112
112
  __slots__ = ("__weakref__",)
113
113
 
114
114
  _cache_primitive_serial = False
115
+ _ignore_non_existing_keys = False
115
116
 
116
117
  _FIELDS: Dict[str, Field]
117
118
  _FIELD_ORDER: List[str]
@@ -128,7 +129,11 @@ class Serializable(metaclass=SerializableMeta):
128
129
  else:
129
130
  values = kwargs
130
131
  for k, v in values.items():
131
- fields[k].set(self, v)
132
+ try:
133
+ fields[k].set(self, v)
134
+ except KeyError:
135
+ if not self._ignore_non_existing_keys:
136
+ raise
132
137
 
133
138
  def __on_deserialize__(self):
134
139
  pass
@@ -507,12 +507,14 @@ class ReferenceField(Field):
507
507
  tag: str,
508
508
  reference_type: Union[str, Type] = None,
509
509
  default: Any = no_default,
510
+ default_factory: Optional[Callable] = None,
510
511
  on_serialize: Callable[[Any], Any] = None,
511
512
  on_deserialize: Callable[[Any], Any] = None,
512
513
  ):
513
514
  super().__init__(
514
515
  tag,
515
516
  default=default,
517
+ default_factory=default_factory,
516
518
  on_serialize=on_serialize,
517
519
  on_deserialize=on_deserialize,
518
520
  )
maxframe/tensor/core.py CHANGED
@@ -43,7 +43,7 @@ from ..serialization.serializables import (
43
43
  StringField,
44
44
  TupleField,
45
45
  )
46
- from ..utils import on_deserialize_shape, on_serialize_shape
46
+ from ..utils import on_deserialize_shape, on_serialize_shape, skip_na_call
47
47
  from .utils import fetch_corner_data, get_chunk_slices
48
48
 
49
49
  logger = logging.getLogger(__name__)
@@ -181,8 +181,8 @@ class TensorData(HasShapeTileableData, _ExecuteAndFetchMixin):
181
181
  _chunks = ListField(
182
182
  "chunks",
183
183
  FieldTypes.reference(TensorChunkData),
184
- on_serialize=lambda x: [it.data for it in x] if x is not None else x,
185
- on_deserialize=lambda x: [TensorChunk(it) for it in x] if x is not None else x,
184
+ on_serialize=skip_na_call(lambda x: [it.data for it in x]),
185
+ on_deserialize=skip_na_call(lambda x: [TensorChunk(it) for it in x]),
186
186
  )
187
187
 
188
188
  def __init__(
@@ -0,0 +1,69 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import base64
17
+ from typing import List, Tuple
18
+
19
+ # 使用pytest生成单元测试
20
+ import pytest
21
+
22
+ from maxframe.codegen import UserCodeMixin
23
+ from maxframe.lib import wrapped_pickle
24
+ from maxframe.serialization.core import PickleContainer
25
+
26
+
27
+ @pytest.mark.parametrize(
28
+ "input_obj, expected_output",
29
+ [
30
+ (None, "None"),
31
+ (10, "10"),
32
+ (3.14, "3.14"),
33
+ (True, "True"),
34
+ (False, "False"),
35
+ (b"hello", "base64.b64decode(b'aGVsbG8=')"),
36
+ ("hello", "'hello'"),
37
+ ([1, 2, 3], "[1, 2, 3]"),
38
+ ({"a": 1, "b": 2}, "{'a': 1, 'b': 2}"),
39
+ ((1, 2, 3), "(1, 2, 3)"),
40
+ ((1,), "(1,)"),
41
+ ((), "()"),
42
+ ({1, 2, 3}, "{1, 2, 3}"),
43
+ (set(), "set()"),
44
+ ],
45
+ )
46
+ def test_obj_to_python_expr(input_obj, expected_output):
47
+ assert UserCodeMixin.obj_to_python_expr(input_obj) == expected_output
48
+
49
+
50
+ def test_obj_to_python_expr_custom_object():
51
+ class CustomClass:
52
+ def __init__(self, a: int, b: List[int], c: Tuple[int, int]):
53
+ self.a = a
54
+ self.b = b
55
+ self.c = c
56
+
57
+ custom_obj = CustomClass(1, [2, 3], (4, 5))
58
+ pickle_data = wrapped_pickle.dumps(custom_obj)
59
+ pickle_str = base64.b64encode(pickle_data)
60
+ custom_obj_pickle_container = PickleContainer([pickle_data])
61
+
62
+ # with class obj will not support currently
63
+ with pytest.raises(ValueError):
64
+ UserCodeMixin.obj_to_python_expr(custom_obj)
65
+
66
+ assert (
67
+ UserCodeMixin.obj_to_python_expr(custom_obj_pickle_container)
68
+ == f"cloudpickle.loads(base64.b64decode({pickle_str}), buffers=[])"
69
+ )
@@ -11,6 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
15
+ import json
14
16
  import time
15
17
 
16
18
  import pytest
@@ -29,28 +31,32 @@ from ..serialization import RemoteException
29
31
  from ..utils import deserialize_serializable, serialize_serializable
30
32
 
31
33
 
34
+ def _json_round_trip(json_data: dict) -> dict:
35
+ return json.loads(json.dumps(json_data))
36
+
37
+
32
38
  def test_result_info_json_serialize():
33
- ri = ResultInfo.from_json(ResultInfo().to_json())
39
+ ri = ResultInfo.from_json(_json_round_trip(ResultInfo().to_json()))
34
40
  assert type(ri) is ResultInfo
35
41
 
36
42
  ri = ODPSTableResultInfo(
37
43
  full_table_name="table_name", partition_specs=["pt=partition"]
38
44
  )
39
- deserial_ri = ResultInfo.from_json(ri.to_json())
45
+ deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
40
46
  assert type(ri) is ODPSTableResultInfo
41
47
  assert ri.result_type == deserial_ri.result_type
42
48
  assert ri.full_table_name == deserial_ri.full_table_name
43
49
  assert ri.partition_specs == deserial_ri.partition_specs
44
50
 
45
51
  ri = ODPSTableResultInfo(full_table_name="table_name")
46
- deserial_ri = ResultInfo.from_json(ri.to_json())
52
+ deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
47
53
  assert type(ri) is ODPSTableResultInfo
48
54
  assert ri.result_type == deserial_ri.result_type
49
55
  assert ri.full_table_name == deserial_ri.full_table_name
50
56
  assert ri.partition_specs == deserial_ri.partition_specs
51
57
 
52
58
  ri = ODPSVolumeResultInfo(volume_name="vol_name", volume_path="vol_path")
53
- deserial_ri = ResultInfo.from_json(ri.to_json())
59
+ deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
54
60
  assert type(ri) is ODPSVolumeResultInfo
55
61
  assert ri.result_type == deserial_ri.result_type
56
62
  assert ri.volume_name == deserial_ri.volume_name
@@ -63,7 +69,7 @@ def test_error_info_json_serialize():
63
69
  except ValueError as ex:
64
70
  err_info = ErrorInfo.from_exception(ex)
65
71
 
66
- deserial_err_info = ErrorInfo.from_json(err_info.to_json())
72
+ deserial_err_info = ErrorInfo.from_json(_json_round_trip(err_info.to_json()))
67
73
  assert deserial_err_info.error_messages == err_info.error_messages
68
74
  assert isinstance(deserial_err_info.raw_error_data, ValueError)
69
75
 
@@ -73,7 +79,7 @@ def test_error_info_json_serialize():
73
79
  with pytest.raises(RemoteException):
74
80
  mf_err_info.reraise()
75
81
 
76
- deserial_err_info = ErrorInfo.from_json(mf_err_info.to_json())
82
+ deserial_err_info = ErrorInfo.from_json(_json_round_trip(mf_err_info.to_json()))
77
83
  assert isinstance(deserial_err_info.raw_error_data, ValueError)
78
84
  with pytest.raises(ValueError):
79
85
  deserial_err_info.reraise()
@@ -94,7 +100,9 @@ def test_dag_info_json_serialize():
94
100
  },
95
101
  error_info=err_info,
96
102
  )
97
- deserial_info = DagInfo.from_json(info.to_json())
103
+ json_info = info.to_json()
104
+ json_info["non_existing_field"] = "non_existing"
105
+ deserial_info = DagInfo.from_json(_json_round_trip(json_info))
98
106
  assert deserial_info.session_id == info.session_id
99
107
  assert deserial_info.dag_id == info.dag_id
100
108
  assert deserial_info.status == info.status
@@ -121,7 +129,7 @@ def test_session_info_json_serialize():
121
129
  idle_timestamp=None,
122
130
  dag_infos={"test_dag_id": dag_info},
123
131
  )
124
- deserial_info = SessionInfo.from_json(info.to_json())
132
+ deserial_info = SessionInfo.from_json(_json_round_trip(info.to_json()))
125
133
  assert deserial_info.session_id == info.session_id
126
134
  assert deserial_info.settings == info.settings
127
135
  assert deserial_info.start_timestamp == info.start_timestamp
maxframe/tests/utils.py CHANGED
@@ -104,6 +104,7 @@ def run_app_in_thread(app_func):
104
104
  q = queue.Queue()
105
105
  exit_event = asyncio.Event(loop=app_loop)
106
106
  app_thread = Thread(
107
+ name="TestAppThread",
107
108
  target=app_thread_func,
108
109
  args=(app_loop, q, exit_event, args, kwargs),
109
110
  )
maxframe/utils.py CHANGED
@@ -338,6 +338,14 @@ def deserialize_serializable(ser_serializable: bytes):
338
338
  return deserialize(header2, buffers2)
339
339
 
340
340
 
341
+ def skip_na_call(func: Callable):
342
+ @functools.wraps(func)
343
+ def new_func(x):
344
+ return func(x) if x is not None else None
345
+
346
+ return new_func
347
+
348
+
341
349
  def url_path_join(*pieces):
342
350
  """Join components of url into a relative url
343
351
 
@@ -450,6 +458,9 @@ _ToThreadRetType = TypeVar("_ToThreadRetType")
450
458
 
451
459
 
452
460
  class ToThreadMixin:
461
+ _thread_pool_size = 1
462
+ _counter = itertools.count().__next__
463
+
453
464
  def __del__(self):
454
465
  if hasattr(self, "_pool"):
455
466
  kw = {"wait": False}
@@ -466,7 +477,10 @@ class ToThreadMixin:
466
477
  **kwargs,
467
478
  ) -> _ToThreadRetType:
468
479
  if not hasattr(self, "_pool"):
469
- self._pool = concurrent.futures.ThreadPoolExecutor(1)
480
+ self._pool = concurrent.futures.ThreadPoolExecutor(
481
+ self._thread_pool_size,
482
+ thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
483
+ )
470
484
 
471
485
  task = asyncio.create_task(
472
486
  to_thread_pool(func, *args, **kwargs, pool=self._pool)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 0.1.0b2
3
+ Version: 0.1.0b3
4
4
  Summary: MaxFrame operator-based data analyze framework
5
5
  Requires-Dist: numpy >=1.19.0
6
6
  Requires-Dist: pandas >=1.0.0
@@ -1,41 +1,41 @@
1
- maxframe-0.1.0b2.dist-info/RECORD,,
2
- maxframe-0.1.0b2.dist-info/WHEEL,sha256=HS_2vgrXyXO7N4Q-5VnuMAgG-fG_r8eemMgfU9Aw3Kk,110
3
- maxframe-0.1.0b2.dist-info/top_level.txt,sha256=O_LOO6KS5Y1ZKdmCjA9mzfg0-b1sB_P6Oy-hD8aSDfM,25
4
- maxframe-0.1.0b2.dist-info/METADATA,sha256=NeMSt0fZ-RjNm4v-FwVNTSA1Keglfyg5vWBBMTMCq_0,3043
1
+ maxframe-0.1.0b3.dist-info/RECORD,,
2
+ maxframe-0.1.0b3.dist-info/WHEEL,sha256=HS_2vgrXyXO7N4Q-5VnuMAgG-fG_r8eemMgfU9Aw3Kk,110
3
+ maxframe-0.1.0b3.dist-info/top_level.txt,sha256=O_LOO6KS5Y1ZKdmCjA9mzfg0-b1sB_P6Oy-hD8aSDfM,25
4
+ maxframe-0.1.0b3.dist-info/METADATA,sha256=056zhE5bxFHo_1CAOnpi1A2Ov1JI8hg435HsygF02lA,3043
5
5
  maxframe_client/conftest.py,sha256=7cwy2sFy5snEaxvtMvxfYFUnG6WtYC_9XxVrwJxOpcU,643
6
6
  maxframe_client/__init__.py,sha256=3b-z0oFVVwtIzVFBxOb9pw7gz4IhTSh4FiHtVgnxS4Q,724
7
7
  maxframe_client/fetcher.py,sha256=Ys_qu2qtniXuj9YSfeHvevdrAAEgm8k4YjyoZusdVmg,6813
8
8
  maxframe_client/clients/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
9
9
  maxframe_client/clients/spe.py,sha256=ArZMNQ7olicI4O1JO7CyRP7-hb60DF71ZKCTO0N39uE,3593
10
- maxframe_client/clients/framedriver.py,sha256=NtG3QjZpuMmxZl9IWJRAAzWLojaWn6k8vy0YRTq6hqU,4452
11
- maxframe_client/tests/test_session.py,sha256=Z86Zv2p1ECpiZDNK7YitRXoEIDmhNWbRJB0HSyX462Y,6238
10
+ maxframe_client/clients/framedriver.py,sha256=Rn09529D2qBTgNGc0oCY0l7b3FgzT87TqS1nujGQaHw,4463
11
+ maxframe_client/tests/test_session.py,sha256=75mxU5UTWjb1loQLo9BAkg7BzKuj4vYX49j4vP1-8fA,6307
12
12
  maxframe_client/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
13
13
  maxframe_client/tests/test_fetcher.py,sha256=q7kYCznM6WSxx9TCbHrxs7Zy1L2a5zu9D-Pi1XNgQzg,3516
14
- maxframe_client/session/task.py,sha256=lb_uNH-DKvL2VnGIJC449p5z3zR8eTyQ3B_xjH1MEOY,10842
14
+ maxframe_client/session/task.py,sha256=z5j8qtBM6cs_lZrvfy4Ji3F3sVOhPOCr5r1RsNe7rN4,11102
15
15
  maxframe_client/session/graph.py,sha256=nwILNOIVaIf4E3xWffTAAlRsKRYU_zGW3oVO10du8Xw,4351
16
16
  maxframe_client/session/__init__.py,sha256=KPqhSlAJiuUz8TC-z5o7mHDVXzLSqWwrZ33zNni7piY,832
17
17
  maxframe_client/session/consts.py,sha256=R37BxDF3kgCy0qmDdwLaH5jB7mb7SzfYV6g9yHBKAwk,1344
18
18
  maxframe_client/session/odps.py,sha256=uuPk5rc1OpFk9PNFU1R6r4HQXOHAOU7ZVvbcdGk_N_s,16248
19
- maxframe_client/session/tests/test_task.py,sha256=MvizDb-WXbJn-cZ_e6rozO-VT95Expxu6ngyeD0TyZA,2617
19
+ maxframe_client/session/tests/test_task.py,sha256=lDdw3gToaM3xSaRXEmHUoAo2h0id7t4v_VvpdKxQAao,3279
20
20
  maxframe_client/session/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
21
21
  maxframe/_utils.pyx,sha256=I4kmfhNr-xXK2ak22dr4Vwahzn-JmTaYctbL3y9_UBQ,17017
22
22
  maxframe/conftest.py,sha256=ZzwKhGp7LAVpzQYJkniwIUQqIegcaDQAhyzDyU2qld4,4264
23
23
  maxframe/opcodes.py,sha256=L1_GGenexNnD4dk1ueelgpAebHCC-cY7d--1QPhw8l4,9984
24
24
  maxframe/env.py,sha256=_K499f7giN7Iu9f39iI9p_naaEDoJ0rx8dInbzqFOVI,1402
25
25
  maxframe/mixin.py,sha256=HBAeWYGb7N6ZIgkA-YpkKiSY1GetcEVNTuMb0ieznBs,3524
26
- maxframe/protocol.py,sha256=8nt4L6C8CLiCU214gxKzwb4L_nxW-1vt368afZgQQgs,13822
26
+ maxframe/protocol.py,sha256=LjjE6iw0ZVx82tBMbff4izkGuiJxRG0MTOaPYYpRL10,14190
27
27
  maxframe/session.py,sha256=TspAq0EUhi-7VQb1kAuQyjhpX8vMYqgdk4vzbGCkJcY,35344
28
28
  maxframe/__init__.py,sha256=YGnga-nYEppPoDZoZ5s64PZ0RYLaWtcYtmYSLTjKUBE,976
29
- maxframe/utils.py,sha256=U-MFtfKr0Np1PGsH9YhVomoKS-jzRrFGGSWkx9tYZGs,33555
29
+ maxframe/utils.py,sha256=BrMJLO0-iPKABkNztXWpqiQWHoe-X75HGiCJr5aeles,33914
30
30
  maxframe/extension.py,sha256=4IzF9sPyaRoAzLud0iDLooOgcyq4QunXH0ki3q9Hn8I,2374
31
31
  maxframe/errors.py,sha256=nQZoLGdLJz-Uf9c2vUvQl08QMvRqtkBOhKfdPYRZA4o,690
32
- maxframe/_utils.cpython-37m-darwin.so,sha256=CQ-EQZ-mAN82XmssYNlXXqVCnLeWrXj9joPwcczP7po,488928
32
+ maxframe/_utils.cpython-37m-darwin.so,sha256=OKe22C-cNEsoPdU-Hz69Z7Cqz3b-OWYvEFvTUa5O58g,488936
33
33
  maxframe/udf.py,sha256=tWOMTkNqGWw3ZpNC9wEU0GGNSBV8sV7E-Ye80DI28eg,2241
34
34
  maxframe/typing_.py,sha256=fzHETru3IOZAJwU9I7n_ib3wHuQRJ9XFVmAk7WpqkMo,1096
35
- maxframe/codegen.py,sha256=mqMIphJFN8r5jtiPrs51QuoBiZoOuYsHBcEeKnUBb-Y,13606
35
+ maxframe/codegen.py,sha256=pSiGHoEo9YajFPHbFHvi7fGkiJmAQdBCe0mMXNOG6-U,15846
36
36
  maxframe/_utils.pxd,sha256=AhJ4vA_UqZqPshi5nvIZq1xgr80fhIVQ9dm5-UdkYJ8,1154
37
37
  maxframe/dataframe/arrays.py,sha256=RWzimUcrds5CsIlPausfJAkLUjcktBSSXwdXyUNKEtU,28752
38
- maxframe/dataframe/__init__.py,sha256=CttHAoUmkUi0o4qujJ29QwHdg2PKjcOGBT_D9eWkSIs,2092
38
+ maxframe/dataframe/__init__.py,sha256=tpbt4OgW4hoU_1OsQu1WSxbpZD4YPXW_oT4xOxNPNqE,2112
39
39
  maxframe/dataframe/core.py,sha256=mJ5I_qBP8HDAzlwPnkj8RN4EyBHC8d5unbe2LC6HKXg,73641
40
40
  maxframe/dataframe/initializer.py,sha256=4BpZJB8bbyFnABUYWBrk_qzzrogEsWgFuU21Ma9IsjY,10264
41
41
  maxframe/dataframe/utils.py,sha256=qWRo51rcMTlo4mvZ8ZZq1zIF9CiAgU1qRtoCAaYrR34,44111
@@ -83,14 +83,14 @@ maxframe/dataframe/datasource/from_index.py,sha256=2061zsQn-BhyHTT0X9tE0JK8vLxQU
83
83
  maxframe/dataframe/datasource/dataframe.py,sha256=LxAKF4gBIHhnJQPuaAUdIEyMAq7HTfiEeNVls5n4I4A,2023
84
84
  maxframe/dataframe/datasource/series.py,sha256=QcYiBNcR8jjH6vdO6l6H9F46KHmlBqVCTI2tv9eyZ9w,1909
85
85
  maxframe/dataframe/datasource/__init__.py,sha256=C8EKsHTJi-1jvJUKIpZtMtsK-ZID3dtxL1voXnaltTs,640
86
- maxframe/dataframe/datasource/read_odps_query.py,sha256=t-kALzm2OXRHChCwRiTWPIp41o-q_v2xHNR9W6nm6Yk,9820
86
+ maxframe/dataframe/datasource/read_odps_query.py,sha256=nj8l38S0iVAXgNXgzDFERO-HNp6lJ1GahImwDpEzXXw,9821
87
87
  maxframe/dataframe/datasource/core.py,sha256=ozFmDgw1og7nK9_jU-u3tLEq9pNbitN-8w8XWdbKkJ0,2687
88
88
  maxframe/dataframe/datasource/date_range.py,sha256=CDGpxDyjLwnb66j-MIiiTfXGXHGh5MLhEmj6x2riIlU,17244
89
- maxframe/dataframe/datasource/read_odps_table.py,sha256=2FIgD2N6IOFj9IbpwOCX_XYOmFpOugLY2Byl6-jtVsI,9018
89
+ maxframe/dataframe/datasource/read_odps_table.py,sha256=VCqWxJswcjujVoUNXb2kbTkOZroMkFCg5n272Yn7ME4,9045
90
90
  maxframe/dataframe/datasource/read_parquet.py,sha256=9auOcy8snTxCOohgXZCUXfT_O39irdkBngZH5svgx0E,14531
91
91
  maxframe/dataframe/datasource/from_tensor.py,sha256=4viuN5SLLye7Xeb8kouOpm-osoQ2yEovWTDNPQuW8gE,14727
92
92
  maxframe/dataframe/datasource/from_records.py,sha256=WBYouYyg7m_8NJdN-yUWSfJlIpm6DVP3IMfLXZFugyI,3442
93
- maxframe/dataframe/datasource/tests/test_datasource.py,sha256=iSAg0W9lme45uBSOR77j0dR7dSevyrHpkM2VVONZGyU,13620
93
+ maxframe/dataframe/datasource/tests/test_datasource.py,sha256=4O3N-XD-MpJxEQfILu4cS7gU82hqgS9g9gnDDEsw56k,14640
94
94
  maxframe/dataframe/datasource/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
95
95
  maxframe/dataframe/sort/sort_index.py,sha256=Hwbkm9x8kqGgXh4gMcAtYMDjYtt-S3CJXfYR9pN5Iqk,5412
96
96
  maxframe/dataframe/sort/__init__.py,sha256=Vt2Ynr7uAX51hLbQu93QeHiFH4D9_WJMO99KljpbO2U,1160
@@ -128,9 +128,9 @@ maxframe/dataframe/extensions/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0
128
128
  maxframe/dataframe/groupby/aggregation.py,sha256=BuhqZal6RLHkjvwJep86oT1a7rMqxxUAPxQ_dao6I6E,11953
129
129
  maxframe/dataframe/groupby/fill.py,sha256=JF3NxyXigZqg8ecKtLSndDmNMX8S6g_ChQR9JAK036E,4721
130
130
  maxframe/dataframe/groupby/cum.py,sha256=jdGQm7U-GosgHMfneHZC5Z2uraj6iBmSFOhqP3m18B0,3755
131
- maxframe/dataframe/groupby/__init__.py,sha256=TNb4WhAV0Q9d65bKMo_n3-bkPhAMj2YA6ZHalPUUeoU,3334
131
+ maxframe/dataframe/groupby/__init__.py,sha256=nZkz1OAdYRj8qwQkUAZDax0pfCsUH_fprwuksS97vuc,3361
132
132
  maxframe/dataframe/groupby/getitem.py,sha256=kUcI9oIrjOcAHnho96Le9yEJxFydALsWbGpZfTtF8gY,3252
133
- maxframe/dataframe/groupby/core.py,sha256=AHO_6tX0fL1YkXLXUm2FjhPvxnmfmj-lkx711Uk5TPM,5980
133
+ maxframe/dataframe/groupby/core.py,sha256=K1hg9jod6z3C65SYoidmEAd_k0Mear4l5IQuwNMjpxQ,6075
134
134
  maxframe/dataframe/groupby/transform.py,sha256=pY3WPA4gN8piYSTInncjnRdh8mi9FDQa00A-Pyaoil4,8586
135
135
  maxframe/dataframe/groupby/head.py,sha256=ZDkbSn3HuUR4GGkZJqo_fL-6KFJfs55aKXQkAh_0wvA,3266
136
136
  maxframe/dataframe/groupby/sample.py,sha256=IdoyzT-V5309txYvM_iaYKupsULfozMGwm1K3oihTf4,6935
@@ -267,13 +267,13 @@ maxframe/core/entity/tileables.py,sha256=b9jn_OQ-FQkbw7E7jMLjoJ4-VR7tBS8Mbx_j4iZ
267
267
  maxframe/core/entity/__init__.py,sha256=Hz_p6eTkrSdkT7YCo5aeGZ33tms5wwifMp4TeYsAVlw,1292
268
268
  maxframe/core/entity/core.py,sha256=t7Ex9Yb7A1h_XwyRG88Fx4ZOai-NQKi2luRVS_jFPEo,4018
269
269
  maxframe/core/entity/utils.py,sha256=IuNgFmBQFRioAA1hgZe6nTEggOmDY-iooZqncQQrV28,942
270
- maxframe/core/entity/executable.py,sha256=mkG6Ao6f4jAPhgGlPg4KOCwXIhdEZ5I69AEf3VF2iGQ,10903
270
+ maxframe/core/entity/executable.py,sha256=HKXHXdPIyxg9i-OWmJxIY3KfXwX0x3xN9QcR5Xhc7dQ,10938
271
271
  maxframe/core/entity/output_types.py,sha256=uqApvFK8w6_aMxRets69dTwD1ndBDgVgqDCflyt9ubg,2645
272
- maxframe/core/entity/objects.py,sha256=3bhwSSzVD0o_UEzsXie-SuCie9y_mma9VDblpy3XmG8,3020
272
+ maxframe/core/entity/objects.py,sha256=RMHLTGbIHZNxxX59lAuQydAKcR32qKleIYUqdElGS4E,3034
273
273
  maxframe/core/entity/fuse.py,sha256=47U6MHRzA2ZvUi-kJb7b3mC_gN07x3yebBgX2Jj7VZo,2277
274
274
  maxframe/core/entity/chunks.py,sha256=yNSLCWOpA_Z6aGr6ZI32dIJf3xPdRBWbvdsl8sTM3BE,2134
275
275
  maxframe/core/graph/__init__.py,sha256=rnsXwW0ouh1f7SVtq73-PzLE-MBM6Op_0l6J7b7wGRE,821
276
- maxframe/core/graph/core.cpython-37m-darwin.so,sha256=rPzFfp9ThfdOtXdwmrMtxbGYf4mITQ7zLuRGh5hLt-I,361144
276
+ maxframe/core/graph/core.cpython-37m-darwin.so,sha256=BYD3I1sY48Dxnav_7JJF952cocb1923gQftQqN4t3-c,361120
277
277
  maxframe/core/graph/entity.py,sha256=56gjXyDXN-TTPm3AQOxuRVQbb_fguKFDL_Xm7i95XEk,5559
278
278
  maxframe/core/graph/core.pyx,sha256=ZJPx_MTOBMaX-6mns6tAiu-wrIBvRAKN44YAGTypJ1Y,15887
279
279
  maxframe/core/graph/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
@@ -294,7 +294,7 @@ maxframe/core/operator/fuse.py,sha256=0RGemF99gQCwV4aEk-K6T5KAGToO-487dFk8LyYDIZ
294
294
  maxframe/core/operator/base.py,sha256=nxuSKjbBzDrItM9PGmFo8RLwParazu525jMLWj0kXkM,15251
295
295
  maxframe/core/operator/tests/test_core.py,sha256=57aICnc5VLqdVK7icAORTWC81bSjBxeeVWIJcha9J_0,1691
296
296
  maxframe/core/operator/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
297
- maxframe/config/config.py,sha256=IqySITkIuIs0Gb6VME55Ox0WXOOT5yFwacC1gWLS83I,12897
297
+ maxframe/config/config.py,sha256=3lQj99eMGg9MBW1gMJAdGFD88UEcdZf71sHgAHXASAk,13045
298
298
  maxframe/config/validators.py,sha256=2m9MrkjDUFiU4PPaWIw8tjwMaOy8AYmuJFqVnnY8IMY,1615
299
299
  maxframe/config/__init__.py,sha256=g5lN3nP2HTAXa6ExGxU1NwU1M9ulYPmAcsV-gU7nIW8,656
300
300
  maxframe/config/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
@@ -307,14 +307,14 @@ maxframe/serialization/arrow.py,sha256=VnGxNLU9UV_cUPTze43bEFCIbYLAOZnp2pAwVJbAI
307
307
  maxframe/serialization/__init__.py,sha256=9eSnoDww1uw2DAXEBBTB2atJQHzd-38XVxrCkoaypxA,921
308
308
  maxframe/serialization/maxframe_objects.py,sha256=R9WEjbHL0Kr56OGkYDU9fcGi7gII6fGlXhi6IyihTsM,1365
309
309
  maxframe/serialization/numpy.py,sha256=8_GSo45l_eNoMn4NAGEb9NLXY_9i4tf9KK4EzG0mKpA,3213
310
- maxframe/serialization/core.cpython-37m-darwin.so,sha256=0pF_PpwHPD_wx-TD5o4IOEwYPRymX3nvK-INeEwEXkI,695456
310
+ maxframe/serialization/core.cpython-37m-darwin.so,sha256=XN9VpQtHGwy82OmscDwRd3LrTrmusQaQ4PBOohTCFM0,695464
311
311
  maxframe/serialization/scipy.py,sha256=hP0fAW0di9UgJrGtANB2S8hLDbFBtR8p5NDqAMt5rDI,2427
312
312
  maxframe/serialization/core.pyx,sha256=AATN47RdBTq2zg7--3xX2VHyAZSvoAuYRt7B7gEgKPE,33984
313
313
  maxframe/serialization/tests/test_serial.py,sha256=Wj_I6CBQMaOtE8WtqdUaBoU8FhBOihht6SfeHOJV-zU,12511
314
314
  maxframe/serialization/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
315
- maxframe/serialization/serializables/field.py,sha256=OjQyF667xANnNTVpQ34etd2LTNZrNUW_1K5lYuVWI1U,15918
315
+ maxframe/serialization/serializables/field.py,sha256=atVgX-9rsVG1fTev7vjQArVwIEaCRjXoSEjpQ3mh6bA,16015
316
316
  maxframe/serialization/serializables/__init__.py,sha256=_wyFZF5QzSP32wSXlXHEPl98DN658I66WamP8XPJy0c,1351
317
- maxframe/serialization/serializables/core.py,sha256=a_6_UgomggS5ty5LXycnAphjFiqA0nEqgNUOQGN04pk,8796
317
+ maxframe/serialization/serializables/core.py,sha256=xlqVUlBK3aLTavHLWHg4JXUTaBGzSuM7t-XHahB8et4,8965
318
318
  maxframe/serialization/serializables/field_type.py,sha256=Feh09hu8XyaxS5MaJ4za_pcvqJVuMkOeGxwQ9OuJw6I,14865
319
319
  maxframe/serialization/serializables/tests/test_field_type.py,sha256=T3ebXbUkKveC9Pq1nIl85e4eYascFeJ52d0REHbz5jo,4381
320
320
  maxframe/serialization/serializables/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
@@ -330,14 +330,15 @@ maxframe/odpsio/tests/test_schema.py,sha256=yss1ly55ErYse95XMFq2s_GWL8UnwZga5RyN
330
330
  maxframe/odpsio/tests/test_arrow.py,sha256=SQ9EmI9_VOOC8u6Rg6nh3IPC2fPbLvJ9HwtpMNDRhL8,3106
331
331
  maxframe/odpsio/tests/test_volumeio.py,sha256=UEqFANuPKyFtlIh2JNi-LoixH52bxsgHdxu3himnEvs,3022
332
332
  maxframe/tests/test_utils.py,sha256=xaAoURr5NOJUTY0XVa2H8qOStcEH5UQSXItkatHFxFE,11977
333
- maxframe/tests/test_protocol.py,sha256=GnA-czIKHvNqZCBbgnWQZ1CP06_SorTEvGZ9VHUGvK4,4857
333
+ maxframe/tests/test_protocol.py,sha256=t11yxh4_gWxxCuk09zo3pn9Nn96DBBQTBt12ewKDwLQ,5187
334
334
  maxframe/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
335
- maxframe/tests/utils.py,sha256=DHpzJLs-U6xiv5jSHe0m1D2b5bk7WLA7fa2-PY4HpOg,4534
335
+ maxframe/tests/utils.py,sha256=wJtSFXt3BD4i5zdO4JBQk_kNAxrtyGLro0jodCA4xuY,4568
336
+ maxframe/tests/test_codegen.py,sha256=GMrnpSb2eyB_nmuv8-_p47Kw877ElKS3BP52SpqZNIQ,2208
336
337
  maxframe/lib/wrapped_pickle.py,sha256=xJa0wI-GsBZFKQpVnlh_hZBlQ2u1D8VO2aBIW7VOdP4,3810
337
338
  maxframe/lib/version.py,sha256=yQ6HkDOvU9X1rpI49auh-qku2g7gIiztgEH6v1urOrk,18321
338
339
  maxframe/lib/compression.py,sha256=k9DSrl_dNBsn5azLjBdL5B4WZ6eNvmCrdMbcF1G7JSc,1442
339
340
  maxframe/lib/__init__.py,sha256=CzfbLNqqm1yR1i6fDwCd4h1ptuKVDbURFVCb0ra7QNc,642
340
- maxframe/lib/mmh3.cpython-37m-darwin.so,sha256=x5pjOTvSfsLeduNnNaJNhZ2a6LkNiU51loSeFgp7nt8,37904
341
+ maxframe/lib/mmh3.cpython-37m-darwin.so,sha256=WAmuMKlVy42_JaGyRDqYEHrWftb3Uk48e2uaDTK8ugE,37904
341
342
  maxframe/lib/functools_compat.py,sha256=PMSkct9GIbzq-aBwTnggrOLNfLh4xQnYTIFMPblzCUA,2616
342
343
  maxframe/lib/mmh3_src/mmh3module.cpp,sha256=9J9eA42eKWTl546fvfQPNuIM3B2jpWSADpgIw3tr2jg,11604
343
344
  maxframe/lib/mmh3_src/MurmurHash3.h,sha256=lg5uXUFyMBge2BWRn0FgrqaCFCMfDWoTXD4PQtjHrMA,1263
@@ -346,7 +347,7 @@ maxframe/lib/tests/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZH
346
347
  maxframe/lib/tests/test_wrapped_pickle.py,sha256=oz1RLwHSZstXgw4caNeaD0ZgQZvkzDLsx7hFN-NvP7U,1524
347
348
  maxframe/lib/cython/__init__.py,sha256=FEFOVLi3o2GpZoedTtLYvbie0eQBehJIjtCrWca2ZHw,596
348
349
  maxframe/lib/cython/libcpp.pxd,sha256=2o9HWtyCMtN9xk7WH_mq1i89IbMPwfZA8yHETjRALXs,1100
349
- maxframe/lib/aio/isolation.py,sha256=p2QuFBVe1ktX8uVFbu6_U1yNzbhBNpFZ9f4A6-Y6XHo,2628
350
+ maxframe/lib/aio/isolation.py,sha256=2nA16GdOXEUNVVdxQXbmU4YvY6wPG2oSV4z1x9uW6Gw,2761
350
351
  maxframe/lib/aio/__init__.py,sha256=1_nx7d5AqXRwa7ODzVfL8gH-tsDAH4YyY-JFPC8TA6w,936
351
352
  maxframe/lib/aio/_threads.py,sha256=tr7FYViGT7nyR7HRw3vE3W-9r3-dZ1IP_Kbhe9sgqpw,1328
352
353
  maxframe/lib/aio/file.py,sha256=aZF8NkkccsVsOniWMBiPqPSk48bb7zGRPB2BegWRaqM,2012
@@ -386,7 +387,7 @@ maxframe/lib/tblib/cpython.py,sha256=FQ0f6WTQyQHoMRhgPqrA0y0Ygxlbj5IC53guxA4h9Cw
386
387
  maxframe/lib/tblib/decorators.py,sha256=bcllK3kVuPnj6SNZGmlJGxTK0ovdt7TJDXrhA4UE5sQ,1063
387
388
  maxframe/tensor/array_utils.py,sha256=259vG4SjyhiheARCZeEnfJdZjoojyrELn41oRcyAELs,4943
388
389
  maxframe/tensor/__init__.py,sha256=-kir8LUsXCDGcc7YdKqWgNEHSrgU_HE5uPam0jLLP6g,3511
389
- maxframe/tensor/core.py,sha256=x9HRQbMjhXJTtPQjKhkN42qZaPQIR7uvkMkOpqUspL8,21920
390
+ maxframe/tensor/core.py,sha256=Ojxaf5b8sJ6ZZGezyFHQJ5XsSpUrBOnZgFeUQgpVJpI,21914
390
391
  maxframe/tensor/utils.py,sha256=bwVN0iuVic1tpFai6Hk-1tQLqckQ2IYS7yZKMTcOU1I,22914
391
392
  maxframe/tensor/operators.py,sha256=iGkDIRz152gXrPb5JbqOvXngpq3QaCg-aNO4gHZPLN0,3461
392
393
  maxframe/tensor/statistics/quantile.py,sha256=UFzTmBwgNL7k_QOJ84qPfycQrW8MyOa1gcp-uFsylIY,9484
@@ -14,6 +14,7 @@
14
14
 
15
15
  from typing import Any, Dict, List
16
16
 
17
+ import msgpack
17
18
  from tornado import httpclient
18
19
 
19
20
  from maxframe.core import TileableGraph
@@ -28,7 +29,6 @@ from maxframe.protocol import (
28
29
  )
29
30
  from maxframe.typing_ import TimeoutType
30
31
  from maxframe.utils import (
31
- deserialize_serializable,
32
32
  format_timeout_params,
33
33
  serialize_serializable,
34
34
  wait_http_response,
@@ -47,12 +47,12 @@ class FrameDriverClient:
47
47
  resp = await httpclient.AsyncHTTPClient().fetch(
48
48
  req_url, method="POST", body=serialize_serializable(req_body)
49
49
  )
50
- return deserialize_serializable(resp.body).body
50
+ return SessionInfo.from_json(msgpack.loads(resp.body))
51
51
 
52
52
  async def get_session(self, session_id: str) -> SessionInfo:
53
53
  req_url = f"{self._endpoint}/api/sessions/{session_id}"
54
54
  resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
55
- return deserialize_serializable(resp.body).body
55
+ return SessionInfo.from_json(msgpack.loads(resp.body))
56
56
 
57
57
  async def delete_session(self, session_id: str):
58
58
  req_url = f"{self._endpoint}/api/sessions/{session_id}"
@@ -71,12 +71,12 @@ class FrameDriverClient:
71
71
  method="POST",
72
72
  body=serialize_serializable(ProtocolBody(body=req_body)),
73
73
  )
74
- return deserialize_serializable(resp.body).body
74
+ return DagInfo.from_json(msgpack.loads(resp.body))
75
75
 
76
76
  async def get_dag_info(self, session_id: str, dag_id: str) -> DagInfo:
77
77
  req_url = f"{self._endpoint}/api/sessions/{session_id}/dags/{dag_id}"
78
78
  resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
79
- return deserialize_serializable(resp.body).body
79
+ return DagInfo.from_json(msgpack.loads(resp.body))
80
80
 
81
81
  async def wait_dag(self, session_id: str, dag_id: str, timeout: TimeoutType = None):
82
82
  query_part = format_timeout_params(timeout)
@@ -87,7 +87,7 @@ class FrameDriverClient:
87
87
  resp = await wait_http_response(
88
88
  req_url, method="GET", request_timeout=timeout
89
89
  )
90
- info = deserialize_serializable(resp.body).body
90
+ info = DagInfo.from_json(msgpack.loads(resp.body))
91
91
  except TimeoutError:
92
92
  info = await self.get_dag_info(session_id, dag_id)
93
93
  return info
@@ -103,7 +103,7 @@ class FrameDriverClient:
103
103
  resp = await wait_http_response(
104
104
  req_url, method="DELETE", request_timeout=timeout
105
105
  )
106
- info = deserialize_serializable(resp.body).body
106
+ info = DagInfo.from_json(msgpack.loads(resp.body))
107
107
  except TimeoutError:
108
108
  info = await self.get_dag_info(session_id, dag_id)
109
109
  return info
@@ -112,13 +112,19 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
112
112
  odps_entry: ODPS,
113
113
  task_name: Optional[str] = None,
114
114
  project: Optional[str] = None,
115
- priority: Optional[str] = None,
115
+ priority: Optional[int] = None,
116
116
  running_cluster: Optional[str] = None,
117
117
  nested_instance_id: Optional[str] = None,
118
118
  major_version: Optional[str] = None,
119
119
  output_format: Optional[str] = None,
120
120
  **kwargs,
121
121
  ):
122
+ if callable(odps_options.get_priority):
123
+ default_priority = odps_options.get_priority(odps_entry)
124
+ else:
125
+ default_priority = odps_options.priority
126
+ priority = priority if priority is not None else default_priority
127
+
122
128
  self._odps_entry = odps_entry
123
129
  self._task_name = task_name
124
130
  self._project = project
@@ -126,6 +132,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
126
132
  self._running_cluster = running_cluster
127
133
  self._major_version = major_version
128
134
  self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
135
+
129
136
  if nested_instance_id is None:
130
137
  self._nested = False
131
138
  self._instance = None
@@ -18,6 +18,7 @@ import os
18
18
  import mock
19
19
  from defusedxml import ElementTree
20
20
  from odps import ODPS
21
+ from odps import options as odps_options
21
22
 
22
23
  from ...session.consts import MAXFRAME_OUTPUT_JSON_FORMAT
23
24
  from ...session.task import MaxFrameInstanceCaller, MaxFrameTask
@@ -27,17 +28,20 @@ expected_file_dir = os.path.join(os.path.dirname(__file__), "expected-data")
27
28
 
28
29
  def test_maxframe_instance_caller_creating_session():
29
30
  o = ODPS.from_environments()
30
- task_caller = MaxFrameInstanceCaller(
31
- odps_entry=o,
32
- task_name="task_test",
33
- major_version="test_version",
34
- output_format=MAXFRAME_OUTPUT_JSON_FORMAT,
35
- priority="100",
36
- running_cluster="test_cluster",
37
- )
31
+
32
+ def create_caller(**kwargs):
33
+ kw = dict(
34
+ odps_entry=o,
35
+ task_name="task_test",
36
+ major_version="test_version",
37
+ output_format=MAXFRAME_OUTPUT_JSON_FORMAT,
38
+ running_cluster="test_cluster",
39
+ )
40
+ kw.update(**kwargs)
41
+ return MaxFrameInstanceCaller(**kw)
38
42
 
39
43
  def mock_create(self, task: MaxFrameTask, priority=None, running_cluster=None):
40
- assert priority == "100"
44
+ assert priority == 100
41
45
  assert running_cluster == "test_cluster"
42
46
  root = ElementTree.parse(
43
47
  os.path.join(expected_file_dir, "create_session.xml")
@@ -62,6 +66,20 @@ def test_maxframe_instance_caller_creating_session():
62
66
  target="maxframe_client.session.task.MaxFrameInstanceCaller",
63
67
  _wait_instance_task_ready=mock.DEFAULT,
64
68
  get_session=mock.DEFAULT,
65
- ):
66
- with mock.patch("odps.models.instances.BaseInstances.create", mock_create):
69
+ ), mock.patch("odps.models.instances.BaseInstances.create", mock_create):
70
+ task_caller = create_caller(priority=100)
71
+ task_caller.create_session()
72
+
73
+ old_priority = odps_options.priority
74
+ old_get_priority = odps_options.get_priority
75
+ try:
76
+ task_caller = create_caller(priority=100)
77
+ odps_options.priority = 100
78
+ task_caller.create_session()
79
+
80
+ odps_options.priority = None
81
+ odps_options.get_priority = lambda _: 100
67
82
  task_caller.create_session()
83
+ finally:
84
+ odps_options.priority = old_priority
85
+ odps_options.get_priority = old_get_priority
@@ -24,6 +24,7 @@ from odps import ODPS
24
24
  import maxframe.dataframe as md
25
25
  import maxframe.remote as mr
26
26
  from maxframe.core import ExecutableTuple, TileableGraph
27
+ from maxframe.lib.aio import stop_isolation
27
28
  from maxframe.protocol import ResultInfo
28
29
  from maxframe.serialization import RemoteException
29
30
  from maxframe.session import new_session
@@ -52,6 +53,7 @@ def start_mock_session(framedriver_app): # noqa: F811
52
53
  time.sleep(5) # Wait for temp table deleted
53
54
  else:
54
55
  session.reset_default()
56
+ stop_isolation()
55
57
 
56
58
 
57
59
  def test_simple_run_dataframe(start_mock_session):