maxframe 0.1.0b2__cp39-cp39-win_amd64.whl → 0.1.0b3__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Binary file
maxframe/codegen.py CHANGED
@@ -17,7 +17,7 @@ import base64
17
17
  import dataclasses
18
18
  import logging
19
19
  from enum import Enum
20
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type
20
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
21
21
 
22
22
  from odps.types import OdpsSchema
23
23
  from odps.utils import camel_to_underline
@@ -30,6 +30,7 @@ from .odpsio import build_dataframe_table_meta
30
30
  from .odpsio.schema import pandas_to_odps_schema
31
31
  from .protocol import DataFrameTableMeta, ResultInfo
32
32
  from .serialization import PickleContainer
33
+ from .serialization.serializables import Serializable, StringField
33
34
  from .typing_ import PandasObjectTypes
34
35
  from .udf import MarkedFunction
35
36
 
@@ -48,8 +49,11 @@ class CodeGenResult:
48
49
  constants: Dict[str, Any]
49
50
 
50
51
 
51
- class AbstractUDF(abc.ABC):
52
- _session_id: str
52
+ class AbstractUDF(Serializable):
53
+ _session_id: str = StringField("session_id")
54
+
55
+ def __init__(self, session_id: Optional[str] = None, **kw):
56
+ super().__init__(_session_id=session_id, **kw)
53
57
 
54
58
  @property
55
59
  def name(self) -> str:
@@ -74,7 +78,66 @@ class AbstractUDF(abc.ABC):
74
78
 
75
79
  class UserCodeMixin:
76
80
  @classmethod
77
- def generate_pickled_codes(cls, code_to_pickle: Any) -> List[str]:
81
+ def obj_to_python_expr(cls, obj: Any = None) -> str:
82
+ """
83
+ Parameters
84
+ ----------
85
+ obj
86
+ The object to convert to python expr.
87
+ Returns
88
+ -------
89
+ str :
90
+ The str type content equals to the object when use in the python code directly.
91
+ """
92
+ if obj is None:
93
+ return "None"
94
+
95
+ if isinstance(obj, (int, float)):
96
+ return repr(obj)
97
+
98
+ if isinstance(obj, bool):
99
+ return "True" if obj else "False"
100
+
101
+ if isinstance(obj, bytes):
102
+ base64_bytes = base64.b64encode(obj)
103
+ return f"base64.b64decode({base64_bytes})"
104
+
105
+ if isinstance(obj, str):
106
+ return repr(obj)
107
+
108
+ if isinstance(obj, list):
109
+ return (
110
+ f"[{', '.join([cls.obj_to_python_expr(element) for element in obj])}]"
111
+ )
112
+
113
+ if isinstance(obj, dict):
114
+ items = (
115
+ f"{repr(key)}: {cls.obj_to_python_expr(value)}"
116
+ for key, value in obj.items()
117
+ )
118
+ return f"{{{', '.join(items)}}}"
119
+
120
+ if isinstance(obj, tuple):
121
+ return f"({', '.join([cls.obj_to_python_expr(sub_obj) for sub_obj in obj])}{',' if len(obj) == 1 else ''})"
122
+
123
+ if isinstance(obj, set):
124
+ return (
125
+ f"{{{', '.join([cls.obj_to_python_expr(sub_obj) for sub_obj in obj])}}}"
126
+ if obj
127
+ else "set()"
128
+ )
129
+
130
+ if isinstance(obj, PickleContainer):
131
+ return UserCodeMixin.generate_pickled_codes(obj, None)
132
+
133
+ raise ValueError(f"not support arg type {type(obj)}")
134
+
135
+ @classmethod
136
+ def generate_pickled_codes(
137
+ cls,
138
+ code_to_pickle: Any,
139
+ unpicked_data_var_name: Union[str, None] = "pickled_data",
140
+ ) -> str:
78
141
  """
79
142
  Generate pickled codes. The final pickled variable is called 'pickled_data'.
80
143
 
@@ -82,20 +145,20 @@ class UserCodeMixin:
82
145
  ----------
83
146
  code_to_pickle: Any
84
147
  The code to be pickled.
148
+ unpicked_data_var_name: str
149
+ The variables in code used to hold the loads object from the cloudpickle
85
150
 
86
151
  Returns
87
152
  -------
88
- List[str] :
89
- The code snippets of pickling, the final variable is called 'pickled_data'.
153
+ str :
154
+ The code snippets of pickling, the final variable is called 'pickled_data' by default.
90
155
  """
91
156
  pickled, buffers = cls.dump_pickled_data(code_to_pickle)
92
- pickled = base64.b64encode(pickled)
93
- buffers = [base64.b64encode(b) for b in buffers]
94
- buffers_str = ", ".join(f"base64.b64decode(b'{b.decode()}')" for b in buffers)
95
- return [
96
- f"base64_data = base64.b64decode(b'{pickled.decode()}')",
97
- f"pickled_data = cloudpickle.loads(base64_data, buffers=[{buffers_str}])",
98
- ]
157
+ pickle_loads_expr = f"cloudpickle.loads({cls.obj_to_python_expr(pickled)}, buffers={cls.obj_to_python_expr(buffers)})"
158
+ if unpicked_data_var_name:
159
+ return f"{unpicked_data_var_name} = {pickle_loads_expr}"
160
+
161
+ return pickle_loads_expr
99
162
 
100
163
  @staticmethod
101
164
  def dump_pickled_data(
@@ -114,8 +177,9 @@ class UserCodeMixin:
114
177
 
115
178
 
116
179
  class BigDagCodeContext(metaclass=abc.ABCMeta):
117
- def __init__(self, session_id: str = None):
180
+ def __init__(self, session_id: str = None, subdag_id: str = None):
118
181
  self._session_id = session_id
182
+ self._subdag_id = subdag_id
119
183
  self._tileable_key_to_variables = dict()
120
184
  self.constants = dict()
121
185
  self._data_table_meta_cache = dict()
@@ -142,10 +206,14 @@ class BigDagCodeContext(metaclass=abc.ABCMeta):
142
206
  except KeyError:
143
207
  var_name = self._tileable_key_to_variables[
144
208
  tileable.key
145
- ] = f"var_{self._next_var_id}"
146
- self._next_var_id += 1
209
+ ] = self.next_var_name()
147
210
  return var_name
148
211
 
212
+ def next_var_name(self) -> str:
213
+ var_name = f"var_{self._next_var_id}"
214
+ self._next_var_id += 1
215
+ return var_name
216
+
149
217
  def get_odps_schema(
150
218
  self, data: PandasObjectTypes, unknown_as_string: bool = False
151
219
  ) -> OdpsSchema:
@@ -275,9 +343,10 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
275
343
  engine_priority: int = 0
276
344
  _extension_loaded = False
277
345
 
278
- def __init__(self, session_id: str):
346
+ def __init__(self, session_id: str, subdag_id: str = None):
279
347
  self._session_id = session_id
280
- self._context = self._init_context(session_id)
348
+ self._subdag_id = subdag_id
349
+ self._context = self._init_context(session_id, subdag_id)
281
350
 
282
351
  @classmethod
283
352
  def _load_engine_extensions(cls):
@@ -307,7 +376,7 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
307
376
  raise NotImplementedError
308
377
 
309
378
  @abc.abstractmethod
310
- def _init_context(self, session_id: str) -> BigDagCodeContext:
379
+ def _init_context(self, session_id: str, subdag_id: str) -> BigDagCodeContext:
311
380
  raise NotImplementedError
312
381
 
313
382
  def _generate_comments(
maxframe/config/config.py CHANGED
@@ -340,6 +340,12 @@ default_options.register_option(
340
340
  validator=is_integer,
341
341
  remote=True,
342
342
  )
343
+ default_options.register_option(
344
+ "session.subinstance_priority",
345
+ None,
346
+ validator=any_validator(is_null, is_integer),
347
+ remote=True,
348
+ )
343
349
 
344
350
  default_options.register_option("warn_duplicated_execution", False, validator=is_bool)
345
351
  default_options.register_option("dataframe.use_arrow_dtype", True, validator=is_bool)
@@ -66,6 +66,7 @@ class DecrefRunner:
66
66
  if self._decref_thread: # pragma: no branch
67
67
  self._queue.put_nowait((None, None, None))
68
68
  self._decref_thread.join(1)
69
+ self._decref_thread = None
69
70
 
70
71
  def put(self, key: str, session_ref: ref):
71
72
  if self._decref_thread is None:
@@ -15,6 +15,7 @@
15
15
  from typing import Any, Dict
16
16
 
17
17
  from ...serialization.serializables import FieldTypes, ListField
18
+ from ...utils import skip_na_call
18
19
  from .chunks import Chunk, ChunkData
19
20
  from .core import Entity
20
21
  from .executable import _ToObjectMixin
@@ -62,8 +63,8 @@ class ObjectData(TileableData, _ToObjectMixin):
62
63
  _chunks = ListField(
63
64
  "chunks",
64
65
  FieldTypes.reference(ObjectChunkData),
65
- on_serialize=lambda x: [it.data for it in x] if x is not None else x,
66
- on_deserialize=lambda x: [ObjectChunk(it) for it in x] if x is not None else x,
66
+ on_serialize=skip_na_call(lambda x: [it.data for it in x]),
67
+ on_deserialize=skip_na_call(lambda x: [ObjectChunk(it) for it in x]),
67
68
  )
68
69
 
69
70
  def __init__(self, op=None, nsplits=None, **kw):
Binary file
@@ -39,6 +39,7 @@ from .datasource.read_odps_query import read_odps_query
39
39
  from .datasource.read_odps_table import read_odps_table
40
40
  from .datasource.read_parquet import read_parquet
41
41
  from .datastore.to_odps import to_odps_table
42
+ from .groupby import NamedAgg
42
43
  from .initializer import DataFrame, Index, Series, read_pandas
43
44
  from .merge import concat, merge
44
45
  from .misc.cut import cut
@@ -52,7 +53,7 @@ from .reduction import CustomReduction, unique
52
53
  from .tseries.to_datetime import to_datetime
53
54
 
54
55
  try:
55
- from pandas import NA, NamedAgg, Timestamp
56
+ from pandas import NA, Timestamp
56
57
  except ImportError: # pragma: no cover
57
58
  pass
58
59
 
@@ -46,7 +46,7 @@ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
46
46
  r"In Task ([^:]+)[\S\s]+FS: output: ([^\n #]+)[\s\S]+schema:\s+([\S\s]+)$",
47
47
  re.MULTILINE,
48
48
  )
49
- _EXPLAIN_COLUMN_REGEX = re.compile(r"([^ ]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
49
+ _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
50
50
 
51
51
 
52
52
  @dataclasses.dataclass
@@ -69,7 +69,7 @@ class DataFrameReadODPSTable(
69
69
  return getattr(self, "partition_spec", None)
70
70
 
71
71
  def get_columns(self):
72
- return self.columns
72
+ return self.columns or list(self.dtypes.index)
73
73
 
74
74
  def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
75
75
  self.columns = columns
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import os
15
16
  from collections import OrderedDict
16
17
 
17
18
  import numpy as np
@@ -33,6 +34,7 @@ from ..from_tensor import (
33
34
  )
34
35
  from ..index import from_pandas as from_pandas_index
35
36
  from ..index import from_tileable
37
+ from ..read_odps_query import ColumnSchema, _resolve_task_sector
36
38
  from ..series import from_pandas as from_pandas_series
37
39
 
38
40
  ray = lazy_import("ray")
@@ -228,6 +230,7 @@ def test_from_odps_table():
228
230
  assert df.op.table_name == test_table.full_table_name
229
231
  assert df.index_value.name is None
230
232
  assert isinstance(df.index_value.value, IndexValue.RangeIndex)
233
+ assert df.op.get_columns() == ["col1", "col2", "col3"]
231
234
  pd.testing.assert_series_equal(
232
235
  df.dtypes,
233
236
  pd.Series(
@@ -247,6 +250,7 @@ def test_from_odps_table():
247
250
  assert df.op.table_name == test_table.full_table_name
248
251
  assert df.index_value.name is None
249
252
  assert isinstance(df.index_value.value, IndexValue.RangeIndex)
253
+ assert df.op.get_columns() == ["col1", "col2"]
250
254
  pd.testing.assert_series_equal(
251
255
  df.dtypes,
252
256
  pd.Series([np.dtype("O"), np.dtype("int64")], index=["col1", "col2"]),
@@ -257,6 +261,7 @@ def test_from_odps_table():
257
261
  assert df.index_value.name == "col1"
258
262
  assert isinstance(df.index_value.value, IndexValue.Index)
259
263
  assert df.index.dtype == np.dtype("O")
264
+ assert df.op.get_columns() == ["col2", "col3"]
260
265
  pd.testing.assert_series_equal(
261
266
  df.dtypes,
262
267
  pd.Series([np.dtype("int64"), np.dtype("float64")], index=["col2", "col3"]),
@@ -267,6 +272,7 @@ def test_from_odps_table():
267
272
 
268
273
  df = read_odps_table(test_parted_table, append_partitions=True)
269
274
  assert df.op.append_partitions is True
275
+ assert df.op.get_columns() == ["col1", "col2", "col3", "pt"]
270
276
  pd.testing.assert_series_equal(
271
277
  df.dtypes,
272
278
  pd.Series(
@@ -280,6 +286,7 @@ def test_from_odps_table():
280
286
  )
281
287
  assert df.op.append_partitions is True
282
288
  assert df.op.partitions == ["pt=20240103"]
289
+ assert df.op.get_columns() == ["col1", "col2", "pt"]
283
290
  pd.testing.assert_series_equal(
284
291
  df.dtypes,
285
292
  pd.Series(
@@ -377,3 +384,18 @@ def test_date_range():
377
384
  assert dr.index_value.is_unique == expected.is_unique
378
385
  assert dr.index_value.is_monotonic_increasing == expected.is_monotonic_increasing
379
386
  assert dr.name == expected.name
387
+
388
+
389
+ def test_resolve_task_sector():
390
+ input_path = os.path.join(os.path.dirname(__file__), "test-data", "task-input.txt")
391
+ with open(input_path, "r") as f:
392
+ sector = f.read()
393
+ actual_sector = _resolve_task_sector("job0", sector)
394
+
395
+ assert actual_sector.job_name == "job0"
396
+ assert actual_sector.task_name == "M1"
397
+ assert actual_sector.output_target == "Screen"
398
+ assert len(actual_sector.schema) == 78
399
+ assert actual_sector.schema[0] == ColumnSchema("unnamed: 0", "bigint", "")
400
+ assert actual_sector.schema[1] == ColumnSchema("id", "bigint", "id_alias")
401
+ assert actual_sector.schema[2] == ColumnSchema("listing_url", "string", "")
@@ -14,6 +14,7 @@
14
14
 
15
15
  # noinspection PyUnresolvedReferences
16
16
  from ..core import DataFrameGroupBy, GroupBy, SeriesGroupBy
17
+ from .core import NamedAgg
17
18
 
18
19
 
19
20
  def _install():
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from collections import namedtuple
16
+
15
17
  import pandas as pd
16
18
 
17
19
  from ... import opcodes
@@ -30,6 +32,9 @@ _GROUP_KEYS_NO_DEFAULT = pd_release_version >= (1, 5, 0)
30
32
  _default_group_keys = no_default if _GROUP_KEYS_NO_DEFAULT else True
31
33
 
32
34
 
35
+ NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
36
+
37
+
33
38
  class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
34
39
  _op_type_ = opcodes.GROUPBY
35
40
 
@@ -14,11 +14,14 @@
14
14
 
15
15
  import asyncio
16
16
  import atexit
17
+ import itertools
17
18
  import threading
18
19
  from typing import Dict, Optional
19
20
 
20
21
 
21
22
  class Isolation:
23
+ _counter = itertools.count().__next__
24
+
22
25
  loop: asyncio.AbstractEventLoop
23
26
  _stopped: Optional[asyncio.Event]
24
27
  _thread: Optional[threading.Thread]
@@ -38,7 +41,9 @@ class Isolation:
38
41
 
39
42
  def start(self):
40
43
  if self._threaded:
41
- self._thread = thread = threading.Thread(target=self._run)
44
+ self._thread = thread = threading.Thread(
45
+ name=f"IsolationThread-{self._counter()}", target=self._run
46
+ )
42
47
  thread.daemon = True
43
48
  thread.start()
44
49
  self._thread_ident = thread.ident
Binary file
maxframe/protocol.py CHANGED
@@ -46,6 +46,8 @@ BodyType = TypeVar("BodyType", bound="Serializable")
46
46
 
47
47
 
48
48
  class JsonSerializable(Serializable):
49
+ _ignore_non_existing_keys = True
50
+
49
51
  @classmethod
50
52
  def from_json(cls, serialized: dict) -> "JsonSerializable":
51
53
  raise NotImplementedError
@@ -245,6 +247,8 @@ class DagInfo(JsonSerializable):
245
247
  default_factory=dict,
246
248
  )
247
249
  error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
250
+ start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
251
+ end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
248
252
 
249
253
  @classmethod
250
254
  def from_json(cls, serialized: dict) -> "DagInfo":
@@ -265,7 +269,10 @@ class DagInfo(JsonSerializable):
265
269
  "dag_id": self.dag_id,
266
270
  "status": self.status.value,
267
271
  "progress": self.progress,
272
+ "start_timestamp": self.start_timestamp,
273
+ "end_timestamp": self.end_timestamp,
268
274
  }
275
+ ret = {k: v for k, v in ret.items() if v is not None}
269
276
  if self.tileable_to_result_infos:
270
277
  ret["tileable_to_result_infos"] = {
271
278
  k: v.to_json() for k, v in self.tileable_to_result_infos.items()
@@ -112,6 +112,7 @@ class Serializable(metaclass=SerializableMeta):
112
112
  __slots__ = ("__weakref__",)
113
113
 
114
114
  _cache_primitive_serial = False
115
+ _ignore_non_existing_keys = False
115
116
 
116
117
  _FIELDS: Dict[str, Field]
117
118
  _FIELD_ORDER: List[str]
@@ -128,7 +129,11 @@ class Serializable(metaclass=SerializableMeta):
128
129
  else:
129
130
  values = kwargs
130
131
  for k, v in values.items():
131
- fields[k].set(self, v)
132
+ try:
133
+ fields[k].set(self, v)
134
+ except KeyError:
135
+ if not self._ignore_non_existing_keys:
136
+ raise
132
137
 
133
138
  def __on_deserialize__(self):
134
139
  pass
@@ -507,12 +507,14 @@ class ReferenceField(Field):
507
507
  tag: str,
508
508
  reference_type: Union[str, Type] = None,
509
509
  default: Any = no_default,
510
+ default_factory: Optional[Callable] = None,
510
511
  on_serialize: Callable[[Any], Any] = None,
511
512
  on_deserialize: Callable[[Any], Any] = None,
512
513
  ):
513
514
  super().__init__(
514
515
  tag,
515
516
  default=default,
517
+ default_factory=default_factory,
516
518
  on_serialize=on_serialize,
517
519
  on_deserialize=on_deserialize,
518
520
  )
maxframe/tensor/core.py CHANGED
@@ -43,7 +43,7 @@ from ..serialization.serializables import (
43
43
  StringField,
44
44
  TupleField,
45
45
  )
46
- from ..utils import on_deserialize_shape, on_serialize_shape
46
+ from ..utils import on_deserialize_shape, on_serialize_shape, skip_na_call
47
47
  from .utils import fetch_corner_data, get_chunk_slices
48
48
 
49
49
  logger = logging.getLogger(__name__)
@@ -181,8 +181,8 @@ class TensorData(HasShapeTileableData, _ExecuteAndFetchMixin):
181
181
  _chunks = ListField(
182
182
  "chunks",
183
183
  FieldTypes.reference(TensorChunkData),
184
- on_serialize=lambda x: [it.data for it in x] if x is not None else x,
185
- on_deserialize=lambda x: [TensorChunk(it) for it in x] if x is not None else x,
184
+ on_serialize=skip_na_call(lambda x: [it.data for it in x]),
185
+ on_deserialize=skip_na_call(lambda x: [TensorChunk(it) for it in x]),
186
186
  )
187
187
 
188
188
  def __init__(
@@ -0,0 +1,69 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import base64
17
+ from typing import List, Tuple
18
+
19
+ # 使用pytest生成单元测试
20
+ import pytest
21
+
22
+ from maxframe.codegen import UserCodeMixin
23
+ from maxframe.lib import wrapped_pickle
24
+ from maxframe.serialization.core import PickleContainer
25
+
26
+
27
+ @pytest.mark.parametrize(
28
+ "input_obj, expected_output",
29
+ [
30
+ (None, "None"),
31
+ (10, "10"),
32
+ (3.14, "3.14"),
33
+ (True, "True"),
34
+ (False, "False"),
35
+ (b"hello", "base64.b64decode(b'aGVsbG8=')"),
36
+ ("hello", "'hello'"),
37
+ ([1, 2, 3], "[1, 2, 3]"),
38
+ ({"a": 1, "b": 2}, "{'a': 1, 'b': 2}"),
39
+ ((1, 2, 3), "(1, 2, 3)"),
40
+ ((1,), "(1,)"),
41
+ ((), "()"),
42
+ ({1, 2, 3}, "{1, 2, 3}"),
43
+ (set(), "set()"),
44
+ ],
45
+ )
46
+ def test_obj_to_python_expr(input_obj, expected_output):
47
+ assert UserCodeMixin.obj_to_python_expr(input_obj) == expected_output
48
+
49
+
50
+ def test_obj_to_python_expr_custom_object():
51
+ class CustomClass:
52
+ def __init__(self, a: int, b: List[int], c: Tuple[int, int]):
53
+ self.a = a
54
+ self.b = b
55
+ self.c = c
56
+
57
+ custom_obj = CustomClass(1, [2, 3], (4, 5))
58
+ pickle_data = wrapped_pickle.dumps(custom_obj)
59
+ pickle_str = base64.b64encode(pickle_data)
60
+ custom_obj_pickle_container = PickleContainer([pickle_data])
61
+
62
+ # with class obj will not support currently
63
+ with pytest.raises(ValueError):
64
+ UserCodeMixin.obj_to_python_expr(custom_obj)
65
+
66
+ assert (
67
+ UserCodeMixin.obj_to_python_expr(custom_obj_pickle_container)
68
+ == f"cloudpickle.loads(base64.b64decode({pickle_str}), buffers=[])"
69
+ )
@@ -11,6 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
15
+ import json
14
16
  import time
15
17
 
16
18
  import pytest
@@ -29,28 +31,32 @@ from ..serialization import RemoteException
29
31
  from ..utils import deserialize_serializable, serialize_serializable
30
32
 
31
33
 
34
+ def _json_round_trip(json_data: dict) -> dict:
35
+ return json.loads(json.dumps(json_data))
36
+
37
+
32
38
  def test_result_info_json_serialize():
33
- ri = ResultInfo.from_json(ResultInfo().to_json())
39
+ ri = ResultInfo.from_json(_json_round_trip(ResultInfo().to_json()))
34
40
  assert type(ri) is ResultInfo
35
41
 
36
42
  ri = ODPSTableResultInfo(
37
43
  full_table_name="table_name", partition_specs=["pt=partition"]
38
44
  )
39
- deserial_ri = ResultInfo.from_json(ri.to_json())
45
+ deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
40
46
  assert type(ri) is ODPSTableResultInfo
41
47
  assert ri.result_type == deserial_ri.result_type
42
48
  assert ri.full_table_name == deserial_ri.full_table_name
43
49
  assert ri.partition_specs == deserial_ri.partition_specs
44
50
 
45
51
  ri = ODPSTableResultInfo(full_table_name="table_name")
46
- deserial_ri = ResultInfo.from_json(ri.to_json())
52
+ deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
47
53
  assert type(ri) is ODPSTableResultInfo
48
54
  assert ri.result_type == deserial_ri.result_type
49
55
  assert ri.full_table_name == deserial_ri.full_table_name
50
56
  assert ri.partition_specs == deserial_ri.partition_specs
51
57
 
52
58
  ri = ODPSVolumeResultInfo(volume_name="vol_name", volume_path="vol_path")
53
- deserial_ri = ResultInfo.from_json(ri.to_json())
59
+ deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
54
60
  assert type(ri) is ODPSVolumeResultInfo
55
61
  assert ri.result_type == deserial_ri.result_type
56
62
  assert ri.volume_name == deserial_ri.volume_name
@@ -63,7 +69,7 @@ def test_error_info_json_serialize():
63
69
  except ValueError as ex:
64
70
  err_info = ErrorInfo.from_exception(ex)
65
71
 
66
- deserial_err_info = ErrorInfo.from_json(err_info.to_json())
72
+ deserial_err_info = ErrorInfo.from_json(_json_round_trip(err_info.to_json()))
67
73
  assert deserial_err_info.error_messages == err_info.error_messages
68
74
  assert isinstance(deserial_err_info.raw_error_data, ValueError)
69
75
 
@@ -73,7 +79,7 @@ def test_error_info_json_serialize():
73
79
  with pytest.raises(RemoteException):
74
80
  mf_err_info.reraise()
75
81
 
76
- deserial_err_info = ErrorInfo.from_json(mf_err_info.to_json())
82
+ deserial_err_info = ErrorInfo.from_json(_json_round_trip(mf_err_info.to_json()))
77
83
  assert isinstance(deserial_err_info.raw_error_data, ValueError)
78
84
  with pytest.raises(ValueError):
79
85
  deserial_err_info.reraise()
@@ -94,7 +100,9 @@ def test_dag_info_json_serialize():
94
100
  },
95
101
  error_info=err_info,
96
102
  )
97
- deserial_info = DagInfo.from_json(info.to_json())
103
+ json_info = info.to_json()
104
+ json_info["non_existing_field"] = "non_existing"
105
+ deserial_info = DagInfo.from_json(_json_round_trip(json_info))
98
106
  assert deserial_info.session_id == info.session_id
99
107
  assert deserial_info.dag_id == info.dag_id
100
108
  assert deserial_info.status == info.status
@@ -121,7 +129,7 @@ def test_session_info_json_serialize():
121
129
  idle_timestamp=None,
122
130
  dag_infos={"test_dag_id": dag_info},
123
131
  )
124
- deserial_info = SessionInfo.from_json(info.to_json())
132
+ deserial_info = SessionInfo.from_json(_json_round_trip(info.to_json()))
125
133
  assert deserial_info.session_id == info.session_id
126
134
  assert deserial_info.settings == info.settings
127
135
  assert deserial_info.start_timestamp == info.start_timestamp
maxframe/tests/utils.py CHANGED
@@ -104,6 +104,7 @@ def run_app_in_thread(app_func):
104
104
  q = queue.Queue()
105
105
  exit_event = asyncio.Event(loop=app_loop)
106
106
  app_thread = Thread(
107
+ name="TestAppThread",
107
108
  target=app_thread_func,
108
109
  args=(app_loop, q, exit_event, args, kwargs),
109
110
  )
maxframe/utils.py CHANGED
@@ -338,6 +338,14 @@ def deserialize_serializable(ser_serializable: bytes):
338
338
  return deserialize(header2, buffers2)
339
339
 
340
340
 
341
+ def skip_na_call(func: Callable):
342
+ @functools.wraps(func)
343
+ def new_func(x):
344
+ return func(x) if x is not None else None
345
+
346
+ return new_func
347
+
348
+
341
349
  def url_path_join(*pieces):
342
350
  """Join components of url into a relative url
343
351
 
@@ -450,6 +458,9 @@ _ToThreadRetType = TypeVar("_ToThreadRetType")
450
458
 
451
459
 
452
460
  class ToThreadMixin:
461
+ _thread_pool_size = 1
462
+ _counter = itertools.count().__next__
463
+
453
464
  def __del__(self):
454
465
  if hasattr(self, "_pool"):
455
466
  kw = {"wait": False}
@@ -466,7 +477,10 @@ class ToThreadMixin:
466
477
  **kwargs,
467
478
  ) -> _ToThreadRetType:
468
479
  if not hasattr(self, "_pool"):
469
- self._pool = concurrent.futures.ThreadPoolExecutor(1)
480
+ self._pool = concurrent.futures.ThreadPoolExecutor(
481
+ self._thread_pool_size,
482
+ thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
483
+ )
470
484
 
471
485
  task = asyncio.create_task(
472
486
  to_thread_pool(func, *args, **kwargs, pool=self._pool)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 0.1.0b2
3
+ Version: 0.1.0b3
4
4
  Summary: MaxFrame operator-based data analyze framework
5
5
  Requires-Dist: numpy >=1.19.0
6
6
  Requires-Dist: pandas >=1.0.0
@@ -1,21 +1,21 @@
1
1
  maxframe/__init__.py,sha256=MgltwhBvnUQDKKmHDg9Y69TJkRGmQQ9m8-D9FC2pcLU,1007
2
- maxframe/_utils.cp39-win_amd64.pyd,sha256=cu_NBL6jXWstApreZwklFX4eW8vFdG5SKUpWbQ96uDQ,305664
2
+ maxframe/_utils.cp39-win_amd64.pyd,sha256=Ig5N-JrpF21quW8LFsdBy--PdWcJwPzBLp1x6LSnp9g,305664
3
3
  maxframe/_utils.pxd,sha256=_qHN-lCY1FQgDFIrrqA79Ys0SBdonp9kXRMS93xKSYk,1187
4
4
  maxframe/_utils.pyx,sha256=_3p6aJEJ6WZYLcNZ6o4DxoxsxqadTlJXFlgDeFPxqUQ,17564
5
- maxframe/codegen.py,sha256=LzOBM2H5U24Eis3NA9tiy5dNe5xKsk7YaUizZKkkG44,14020
5
+ maxframe/codegen.py,sha256=S23hTTu2fIJTv5PMIItE_NPUnadUyoiMmb-1YAqbaWw,16329
6
6
  maxframe/conftest.py,sha256=JE9I-5mP4u-vgUqYL22mNY3tqpGofM8VMe8c8VUYkzk,4403
7
7
  maxframe/env.py,sha256=xY4wjMWIJ4qLsFAQ5F-X5CrVR7dDSWiryPXni0YSK5c,1435
8
8
  maxframe/errors.py,sha256=xBnvoJjjNcHVLhwj77Dux9ut8isGVmmJXFqefmmx8Ak,711
9
9
  maxframe/extension.py,sha256=o7yiS99LWTtLF7ZX6F78UUJAqUyd-LllOXA2l69np50,2455
10
10
  maxframe/mixin.py,sha256=QfX0KqVIWDlVDSFs0lwdzLexw7lS7W_IUuK7aY1Ib8c,3624
11
11
  maxframe/opcodes.py,sha256=L-BvSFEUOMrtGJFXIH6zk2Xv_-te5VZxv5eDDCNNt0U,10566
12
- maxframe/protocol.py,sha256=4JOFt8zU56d7BltDxksmuQaJPSG_f18Js4XS24SzPDo,14230
12
+ maxframe/protocol.py,sha256=N4i0ggLY131gwnxOrCgKeZwzhLKSRB171cx1lWRvUcw,14605
13
13
  maxframe/session.py,sha256=Mme-jB2hioJC6ttrXfX4XSeHLCMYoFR0ikmmhx82-vc,36624
14
14
  maxframe/typing_.py,sha256=pAgOhHHSM376N7PJLtNXvS5LHNYywz5dIjnA_hHRWSM,1133
15
15
  maxframe/udf.py,sha256=EFAAV2c8SpWKcF9_8Pocpjc4bXsEASf57Qy_Q30YH4Q,2315
16
- maxframe/utils.py,sha256=xbXViFjhzlAjvg_K7kHc0pxt2pSJJ_iSBlmCyXOWNAs,34644
16
+ maxframe/utils.py,sha256=CpA4Cqf5Lg7LMKJeJFsmybqVutcSh3sSqMhdHnIi0h4,35017
17
17
  maxframe/config/__init__.py,sha256=AHo3deaCm1JnbbRX_udboJEDYrYytdvivp9RFxJcumI,671
18
- maxframe/config/config.py,sha256=PYJgzQy3rjVDbLzfyn9vtU4Lf7cgcJgqO5qDmNVnMUc,13321
18
+ maxframe/config/config.py,sha256=Top-9hUEkPvaTlAGrP9GzGjwvZk6YPuHtwy4TEryKqg,13475
19
19
  maxframe/config/validators.py,sha256=pKnloh2kEOBRSsT8ks-zL8XVSaMMVIEvHvwNJlideeo,1672
20
20
  maxframe/config/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
21
21
  maxframe/config/tests/test_config.py,sha256=FWQZ6KBUG_jY1-KaR-GKXl7khhlTbuLlk3uaEV8koM8,2839
@@ -26,14 +26,14 @@ maxframe/core/mode.py,sha256=a-2AjLrIaqemN3pZPFhdfrPYXR7ryCLcsT1KJwWKPb0,3107
26
26
  maxframe/core/entity/__init__.py,sha256=tD4zo3KXpzLrQraHnIXeO1Q961lSsIqpbAGRK2WijVE,1336
27
27
  maxframe/core/entity/chunks.py,sha256=zKk8Iyc3IkakIDW1bMYq_zZNLrR4ZMdXH-mBuOiFerM,2202
28
28
  maxframe/core/entity/core.py,sha256=aFwjNMhTJ4ybr1WzmMVSTG211fzutzaATs14QoNh-JM,4170
29
- maxframe/core/entity/executable.py,sha256=Pt-ad4u2jqb9mCbOHp0DcL1Oo6kjUzF411ZxQrouqys,11239
29
+ maxframe/core/entity/executable.py,sha256=CKxFGvFPfY_8JBprhpyndhTSLgVLtUG4G5n7Dw0dHnw,11275
30
30
  maxframe/core/entity/fuse.py,sha256=X1lI0WXj5t0flgGI5-qlVl5LoYkAdLJHk2Vv767C9G4,2350
31
- maxframe/core/entity/objects.py,sha256=GST_N6nwNxUv1wXaPBWty7n8Asn2hX8wfIDjrctkZP8,3119
31
+ maxframe/core/entity/objects.py,sha256=Ys_l6cBp0HwgRmXuqYo4HsnjdbfUW4mgvek5W0IMmXY,3134
32
32
  maxframe/core/entity/output_types.py,sha256=NnNeDBVAEhD8dtPBWzpM7n6s8neVFrahjd0zMGWroCc,2735
33
33
  maxframe/core/entity/tileables.py,sha256=6jJyFscvb8sH5K_k2VaNGeUm8YrpevCtou3WSUl4Dw8,13973
34
34
  maxframe/core/entity/utils.py,sha256=454RYVbTMVW_8KnfDqUPec4kz1p98izVTC2OrzhOkao,966
35
35
  maxframe/core/graph/__init__.py,sha256=n1WiszgVu0VdXsk12oiAyggduNwu-1-9YKnfZqvmmXk,838
36
- maxframe/core/graph/core.cp39-win_amd64.pyd,sha256=5K_iDjXsk36IBCH6WyYsXFZYwN4NnsAt77AffVNtkpI,249344
36
+ maxframe/core/graph/core.cp39-win_amd64.pyd,sha256=ihFU0cpiDugw4-EfUAzPbk46z6Rutez4lL37kzEi8oQ,249344
37
37
  maxframe/core/graph/core.pyx,sha256=WYlYtXXSs72vfhf2ttJO-4u85exYzy2J9mlALHOMqoA,16354
38
38
  maxframe/core/graph/entity.py,sha256=RT_xbP5niUN5D6gqZ5Pg1vUegHn8bqPk8G8A30quOVA,5730
39
39
  maxframe/core/graph/builder/__init__.py,sha256=vTRY5xRPOMHUsK0jAtNIb1BjSPGqi_6lv86AroiiiL4,718
@@ -54,7 +54,7 @@ maxframe/core/operator/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH
54
54
  maxframe/core/operator/tests/test_core.py,sha256=iqZk4AWubFLO24V_VeV6SEy5xrzBFLP9qKK6tKO0SGs,1755
55
55
  maxframe/core/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
56
56
  maxframe/core/tests/test_mode.py,sha256=fyRH-ksa6MogEs6kNhtXhCZyvhYqflgaXJYI3nSo-ps,2507
57
- maxframe/dataframe/__init__.py,sha256=tDpskfQDhuP5IoaqPFPvuzZCYW_FXAdmnD48CDQ_ysQ,2166
57
+ maxframe/dataframe/__init__.py,sha256=t_Fnu2MNCgOr_N7mMqGU3LeZXvBF1yOHtLb5u1PoHt0,2187
58
58
  maxframe/dataframe/arrays.py,sha256=rOvhxMQars9E3SOYSu0ygBuuRVY0QV6xzengnMqKs4s,29616
59
59
  maxframe/dataframe/core.py,sha256=63TqgmOCTr5wtwbILJ6bLtHZQ6xnZd7LvVrMqR27GDQ,76034
60
60
  maxframe/dataframe/initializer.py,sha256=WW96yQjquofNFt6RPZvgWW4SBmH0OEDj8-BxpuyKThY,10552
@@ -119,12 +119,12 @@ maxframe/dataframe/datasource/from_records.py,sha256=ygpKOMXZnDdWzGxMxQ4KdGv-tJF
119
119
  maxframe/dataframe/datasource/from_tensor.py,sha256=mShHYi0fZcG7ZShFVgIezaphh8tSFqR9-nQMm5YKIhw,15146
120
120
  maxframe/dataframe/datasource/index.py,sha256=X_NShW67nYJGxaWp3qOrvyInNkz9L-XHjbApU4fHoes,4518
121
121
  maxframe/dataframe/datasource/read_csv.py,sha256=IvQihmpcZIdzSD7ziX92aTAHNyP5WnTgd2cZz_h43sQ,24668
122
- maxframe/dataframe/datasource/read_odps_query.py,sha256=BbIfSc1dOzauZQYLAIxKO-Z2VGZmBQ884UqSdalmG_o,10115
123
- maxframe/dataframe/datasource/read_odps_table.py,sha256=kZDpJvq3d39hLYa4oznk2LjJXipOUkfXadDhxPFSZbs,9267
122
+ maxframe/dataframe/datasource/read_odps_query.py,sha256=QKU_7R6dd6GXzNX4IN9uwu3jB17MfhTr2nUtx0LFSNk,10116
123
+ maxframe/dataframe/datasource/read_odps_table.py,sha256=ocSKQQ7SwIkzliFCFWNzy3e8J3CBZsf4971oRdlgdks,9294
124
124
  maxframe/dataframe/datasource/read_parquet.py,sha256=SZPrWoax2mwMBNvRk_3lkS72pZLe-_X_GwQ1JROBMs4,14952
125
125
  maxframe/dataframe/datasource/series.py,sha256=elQVupKETh-hUHI2fTu8TRxBE729Vyrmpjx17XlRV-8,1964
126
126
  maxframe/dataframe/datasource/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
127
- maxframe/dataframe/datasource/tests/test_datasource.py,sha256=mJc8z0dSDyT1nni-VTij6LLcu_sp8Cj0eekxga4Ritk,13999
127
+ maxframe/dataframe/datasource/tests/test_datasource.py,sha256=UumRBjE-bIuCi7Z4_3t8qb58ZcF8ePRZf3xF7DTvqIA,15041
128
128
  maxframe/dataframe/datastore/__init__.py,sha256=MmlHYvFacMReOHDQMXF-z2bCsLyrSHYBVwIlCsZGOK4,810
129
129
  maxframe/dataframe/datastore/to_csv.py,sha256=lheaF3ZmBPrcwcWyhK5gEVAGIaLJbvTyVAzqZFGG7eM,8026
130
130
  maxframe/dataframe/datastore/to_odps.py,sha256=Ml_iF9AspqIgGbeOAjTF3ukAwv-7SizribuqXZdxuXo,5776
@@ -135,10 +135,10 @@ maxframe/dataframe/extensions/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b
135
135
  maxframe/dataframe/extensions/tests/test_extensions.py,sha256=oDnVwQx-o8p4wmen8ZS3gnudOAihFAdNKQhSCqNNXzQ,1324
136
136
  maxframe/dataframe/fetch/__init__.py,sha256=W1arTCAjD0v_bdVeIzpJMnil3h0Ucsn29zFWmgZcYck,668
137
137
  maxframe/dataframe/fetch/core.py,sha256=27VANjpMm2rdCg1KPZxWZrKWNuNgSMzZrordI05mqWc,3424
138
- maxframe/dataframe/groupby/__init__.py,sha256=YjjKMh841TLcU-gik7n9fjeYNFwtYjvrHNq4zf83UBM,3415
138
+ maxframe/dataframe/groupby/__init__.py,sha256=wMjmvk4ced1uCm7bw0oodIKvaep61KhupriL9JRRq5w,3443
139
139
  maxframe/dataframe/groupby/aggregation.py,sha256=cUnu-Bj6YD1TVkaafwL2aGIIqixLEq7s9-7BQ_1T2DI,12303
140
140
  maxframe/dataframe/groupby/apply.py,sha256=DQHyEfqj-3tfK-CxwpdVgya0_YC9dImeWYPZJDw7ckk,9735
141
- maxframe/dataframe/groupby/core.py,sha256=BPcDrMN710phqwV1_F7kZKKan7ZqxoER-Jd4mXxMMyw,6154
141
+ maxframe/dataframe/groupby/core.py,sha256=NG6e3sqIu5dnBw9_DCQEDtsnxM5e4Yl1oD7Z_qjdtWA,6254
142
142
  maxframe/dataframe/groupby/cum.py,sha256=A7vIWLsb50VLu3yAngO-BfZecjWj0Fk6TZ5v4uQEAPM,3879
143
143
  maxframe/dataframe/groupby/fill.py,sha256=AXRmA_j-m7ig0udLCJ02FwIce2GLQ2U8KlnuCe-NY3U,4862
144
144
  maxframe/dataframe/groupby/getitem.py,sha256=owNzoE8UEfM41dfuntKkRBjjYYbY8O8CMJchIhCEyds,3344
@@ -283,7 +283,7 @@ maxframe/learn/contrib/pytorch/tests/test_pytorch.py,sha256=GHP-oD5uMU8LD90Jt2cH
283
283
  maxframe/lib/__init__.py,sha256=_PB28W40qku6YiT8fJYqdmEdRMQfelOwGeksCOZJfCc,657
284
284
  maxframe/lib/compression.py,sha256=QQpNK79iUC9zck74I0HKMhapSRnLBXtTRyS91taEVIc,1497
285
285
  maxframe/lib/functools_compat.py,sha256=2LTrkSw5i-z5E9XCtZzfg9-0vPrYxicKvDjnnNrAL1Q,2697
286
- maxframe/lib/mmh3.cp39-win_amd64.pyd,sha256=2cgKxEhMe8AxmIhYaKwe2c3mTz7dLWQ4vHPTAzGEEA4,17408
286
+ maxframe/lib/mmh3.cp39-win_amd64.pyd,sha256=sy3Uoiv0kFIwa-3jA-x5HvIpHgJdrcQi6zOKIBPP31A,17408
287
287
  maxframe/lib/version.py,sha256=VOVZu3KHS53YUsb_vQsT7AyHwcCWAgc-3bBqV5ANcbQ,18941
288
288
  maxframe/lib/wrapped_pickle.py,sha256=bzEaokhAZlkjXqw1xfeKO1KX2awhKIz_1RT81yPPoag,3949
289
289
  maxframe/lib/aio/__init__.py,sha256=xzIYnV42_7CYuDTTv8svscIXQeJMF0nn8AXMbpv173M,963
@@ -291,7 +291,7 @@ maxframe/lib/aio/_runners.py,sha256=zhDC92KxrYxLEufo5Hk8QU-mTVOxNL7IM9pZXas_nDg,
291
291
  maxframe/lib/aio/_threads.py,sha256=cDaEKg5STncq9QTPUUwehJ722vgueqBoB1C-NeoHN8E,1363
292
292
  maxframe/lib/aio/base.py,sha256=Ol0MnkcsBRfsQdZWceYfaWVtNOuiHzY8EYo2Zh0QFvM,2240
293
293
  maxframe/lib/aio/file.py,sha256=uy2LM_U8-Snpf45yZqUQRR_0hZT5UXZnwq0qENpMI6k,2097
294
- maxframe/lib/aio/isolation.py,sha256=zUqytRXUHRniUrZCRtsJkBldP-RnGWv3KpelQ3hyRdU,2723
294
+ maxframe/lib/aio/isolation.py,sha256=wIliQ1qFtGV_cZ4stE21QHKUVtq7j88m7ZuZF8Ye2iE,2861
295
295
  maxframe/lib/aio/lru.py,sha256=hZ0QY8VWhZr06B11YqjEONKcjySP7oKaa-p9evwnxZY,7133
296
296
  maxframe/lib/aio/parallelism.py,sha256=Q3dXir6wr5vG2SmVSz0n6BdH7d5mhMTohfeFs5JDTtU,1272
297
297
  maxframe/lib/aio/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
@@ -346,7 +346,7 @@ maxframe/remote/core.py,sha256=w_eTDEs0O7iIzLn1YrMGh2gcNAzzbqV0mx2bRT7su_U,7001
346
346
  maxframe/remote/run_script.py,sha256=k93-vaFLUanWoBRai4-78DX_SLeZ8_rbbxcCtOIXZO8,3677
347
347
  maxframe/serialization/__init__.py,sha256=nxxU7CI6MRcL3sjA1KmLkpTGKA3KG30FKl-MJJ0MCdI,947
348
348
  maxframe/serialization/arrow.py,sha256=OMeDjLcPgagqzokG7g3Vhwm6Xw1j-Kph1V2QsIwi6dw,3513
349
- maxframe/serialization/core.cp39-win_amd64.pyd,sha256=1do04vxT1TRzI-yR9cIIxoI3SFQ0V1ETTRKxHue-Rfw,393216
349
+ maxframe/serialization/core.cp39-win_amd64.pyd,sha256=v5hTr21JOIqdmz-tR5jJ5XFr-bymjyOxBdiUKjge25w,393216
350
350
  maxframe/serialization/core.pxd,sha256=Fymih3Wo-CrOY27_o_DRINdbRGR7mgiT-XCaXCXafxM,1347
351
351
  maxframe/serialization/core.pyx,sha256=Qmipu3LiJGIBVy_7d4tSJqcYWnG5xj2I7IaPv2PSq5E,35078
352
352
  maxframe/serialization/exception.py,sha256=e7bZyPlZ8XhSCdeOwlYreq0HazPXKOgOA6r9Q4Ecn2Y,3113
@@ -355,8 +355,8 @@ maxframe/serialization/numpy.py,sha256=ENrFKl24mtYyO1vZRLwHvMD0r4z_UI7J2-yNlmfWS
355
355
  maxframe/serialization/pandas.py,sha256=3aPzDOg9UYSI9GFpWm2aJc8EAi-d-timM8vQ8kTL3Cg,7349
356
356
  maxframe/serialization/scipy.py,sha256=fGwQ5ZreymrMT8g7TneATfFdKFF7YPNZQqgWgMa3J8M,2498
357
357
  maxframe/serialization/serializables/__init__.py,sha256=rlQhIaSAVzz4KYkc5shEHFZDPd6WDMPkxalU76yjJ3M,1406
358
- maxframe/serialization/serializables/core.py,sha256=9QlrQIRWSf0Ocs3YB4_n62v03RAkrb1rGN8AFCuzumI,9053
359
- maxframe/serialization/serializables/field.py,sha256=Rv9qFKCE0G9TSJXo1VU8mYu_OLakFsiZHC0enDrKIBs,16540
358
+ maxframe/serialization/serializables/core.py,sha256=QUjHQPG_Qd5yh_bW-mCdapY5uKUl-s1axrM2N3eomyQ,9227
359
+ maxframe/serialization/serializables/field.py,sha256=DVott3HAbne4UvN-heSFS9gSl0wCxV5RssS738FCjzk,16639
360
360
  maxframe/serialization/serializables/field_type.py,sha256=hkxrXT2SL_tATuobtJDfL4DzzVP2hJjDlC3PrJg6ZKo,15454
361
361
  maxframe/serialization/serializables/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
362
362
  maxframe/serialization/serializables/tests/test_field_type.py,sha256=uG87-bdG8xGmjrubEHCww1ZKmRupSvnNKnZoV2SnwYM,4502
@@ -365,7 +365,7 @@ maxframe/serialization/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH
365
365
  maxframe/serialization/tests/test_serial.py,sha256=9G2CbPBHINwcZ038pRwZON_OtH-JVXZ8w66BLYWP578,12923
366
366
  maxframe/tensor/__init__.py,sha256=aomZCK-bt5OYyRLGcbACxtFLrWIp14F4R3P79zwbN5E,3694
367
367
  maxframe/tensor/array_utils.py,sha256=xr_Ng-4dETJFjsMfWi5gbTPM9mRmPvRWj8QY2WKjmCg,5129
368
- maxframe/tensor/core.py,sha256=u9HPbYj1gOF4m7WJpjSCprIHR-3HEEwK_LhjPNmro7g,22644
368
+ maxframe/tensor/core.py,sha256=-G-UzY81GTKj2SD9FQLqBg-UDod5LjjrEA-uF16ofms,22638
369
369
  maxframe/tensor/operators.py,sha256=8VsSZ8OcImGkSRQvrYlV05KMHGsroAYmW1o9RM2yV1U,3584
370
370
  maxframe/tensor/utils.py,sha256=An35s6MrbltYvN8WYzjKCjyozTDbGQrvUW_qz8KnA94,23632
371
371
  maxframe/tensor/arithmetic/__init__.py,sha256=SUlcG0Mf9ddgxAdydenuJ9eY5yVu0TgKfpBujI3OX4w,9695
@@ -603,26 +603,27 @@ maxframe/tensor/statistics/quantile.py,sha256=HrPxQoRXTEGf-5m79osqhUoSTFpWmIwmhO
603
603
  maxframe/tensor/ufunc/__init__.py,sha256=8QUi-cPvvbsD7i7LOeZ9sc0v1XXd7lt-XV5pQKbVZJs,821
604
604
  maxframe/tensor/ufunc/ufunc.py,sha256=XRtGlhdrW7H--mrc8fTBOlUP0mzKpd9tdRtCuLDymtc,7383
605
605
  maxframe/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
606
- maxframe/tests/test_protocol.py,sha256=n7LSC42JtGQuCRt0XY8sd9ewf6i_1C0LE3ux9yNMGp8,4993
606
+ maxframe/tests/test_codegen.py,sha256=h3TKqP4zghxTn1twH7gR9jOe6NKXdCC1B4u0chlUrpY,2277
607
+ maxframe/tests/test_protocol.py,sha256=IgZT2CBH1dv6V1DwSq-PjvrUhvtOf8Mab6dnWhAT3No,5331
607
608
  maxframe/tests/test_utils.py,sha256=0Iey3O6zrGI1yQU2OSpWavJNvhUjrmdkct4-27tkGUM,12353
608
- maxframe/tests/utils.py,sha256=ulCRwCjaJeoMo4NwvekJq5BhEJzzr9sdTn-cOx9HG_A,4697
609
+ maxframe/tests/utils.py,sha256=gCre-8BApU4-AEun9WShm4Ff5a9a_oKxvLNneESXBjU,4732
609
610
  maxframe_client/__init__.py,sha256=xqlN69LjvAp2bNCaT9d82U9AF5WKi_c4UOheEW1wV9E,741
610
611
  maxframe_client/conftest.py,sha256=UWWMYjmohHL13hLl4adb0gZPLRdBVOYVvsFo6VZruI0,658
611
612
  maxframe_client/fetcher.py,sha256=ajt14PYVEXKShXSx-qTL_8adMCXBEsZoOhHr3yx95-M,7015
612
613
  maxframe_client/clients/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
613
- maxframe_client/clients/framedriver.py,sha256=_Hv_pcV0Rmc3v8sNLzrZyTYz5AyoavDPOs8jtotFQWU,4570
614
+ maxframe_client/clients/framedriver.py,sha256=upN6C1eZrCpLTsS6fihWOMy392psWfo0bw2XgSLI_Yg,4581
614
615
  maxframe_client/clients/spe.py,sha256=uizNBejhU_FrMhsgsFgDnq7gL7Cxk803LeLYmr3nmxs,3697
615
616
  maxframe_client/session/__init__.py,sha256=9zFCd3zkSADESAFc4SPoQ2nkvRwsIhhpNNO2TtSaWbU,854
616
617
  maxframe_client/session/consts.py,sha256=nD-D0zHXumbQI8w3aUyltJS59K5ftipf3xCtHNLmtc8,1380
617
618
  maxframe_client/session/graph.py,sha256=GSZaJ-PV4DK8bTcNtoSoY5kDTyyIRAKleh4tOCSUbsI,4470
618
619
  maxframe_client/session/odps.py,sha256=RG7_28UaS_8tgJUOa4ohw2QtwX2fF4yqsGGy2MksQWI,16700
619
- maxframe_client/session/task.py,sha256=pJYSdOo-nTXTz7An0u2vZop1MtQKMhAAhiT2m8ilk38,11142
620
+ maxframe_client/session/task.py,sha256=R8x8OERIb673vTq-o0ig6Zy2NT4_jvi8AbLhyMaljo8,11409
620
621
  maxframe_client/session/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
621
- maxframe_client/session/tests/test_task.py,sha256=lm2q1tWZiaYeU0D11_f_t7acZLLi1Ts-nlIFpZHsm4U,2684
622
+ maxframe_client/session/tests/test_task.py,sha256=861usEURVXeTUzfJYZmBfwsHfZFexG23mMtT5IJOOm4,3364
622
623
  maxframe_client/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
623
624
  maxframe_client/tests/test_fetcher.py,sha256=7iYXLMIoCJLfgUkjB2HBkV-sqQ-xGlhtzfp9hRJz_kM,3605
624
- maxframe_client/tests/test_session.py,sha256=vTycMT6IMtPQcpVFCXFWLniHXj6m03JsSciJ7x_kAbU,6443
625
- maxframe-0.1.0b2.dist-info/METADATA,sha256=FLBUoJqc372CTgONZ6NrajzjXo3jFdUiVp1ZQH4HSXk,3147
626
- maxframe-0.1.0b2.dist-info/WHEEL,sha256=Z6c-bE0pUM47a70GvqO_SvH_XXU0lm62gEAKtoNJ08A,100
627
- maxframe-0.1.0b2.dist-info/top_level.txt,sha256=64x-fc2q59c_vXwNUkehyjF1vb8JWqFSdYmUqIFqoTM,31
628
- maxframe-0.1.0b2.dist-info/RECORD,,
625
+ maxframe_client/tests/test_session.py,sha256=s8pxf0I6PjOd6ZZQ4IYjfOM_3F3wf6SGPRMz0tAZFmo,6514
626
+ maxframe-0.1.0b3.dist-info/METADATA,sha256=aZIVKoKmfRSjIExk2K16jB0fJj5iApw9FhoHqv6hs_k,3147
627
+ maxframe-0.1.0b3.dist-info/WHEEL,sha256=Z6c-bE0pUM47a70GvqO_SvH_XXU0lm62gEAKtoNJ08A,100
628
+ maxframe-0.1.0b3.dist-info/top_level.txt,sha256=64x-fc2q59c_vXwNUkehyjF1vb8JWqFSdYmUqIFqoTM,31
629
+ maxframe-0.1.0b3.dist-info/RECORD,,
@@ -14,6 +14,7 @@
14
14
 
15
15
  from typing import Any, Dict, List
16
16
 
17
+ import msgpack
17
18
  from tornado import httpclient
18
19
 
19
20
  from maxframe.core import TileableGraph
@@ -28,7 +29,6 @@ from maxframe.protocol import (
28
29
  )
29
30
  from maxframe.typing_ import TimeoutType
30
31
  from maxframe.utils import (
31
- deserialize_serializable,
32
32
  format_timeout_params,
33
33
  serialize_serializable,
34
34
  wait_http_response,
@@ -47,12 +47,12 @@ class FrameDriverClient:
47
47
  resp = await httpclient.AsyncHTTPClient().fetch(
48
48
  req_url, method="POST", body=serialize_serializable(req_body)
49
49
  )
50
- return deserialize_serializable(resp.body).body
50
+ return SessionInfo.from_json(msgpack.loads(resp.body))
51
51
 
52
52
  async def get_session(self, session_id: str) -> SessionInfo:
53
53
  req_url = f"{self._endpoint}/api/sessions/{session_id}"
54
54
  resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
55
- return deserialize_serializable(resp.body).body
55
+ return SessionInfo.from_json(msgpack.loads(resp.body))
56
56
 
57
57
  async def delete_session(self, session_id: str):
58
58
  req_url = f"{self._endpoint}/api/sessions/{session_id}"
@@ -71,12 +71,12 @@ class FrameDriverClient:
71
71
  method="POST",
72
72
  body=serialize_serializable(ProtocolBody(body=req_body)),
73
73
  )
74
- return deserialize_serializable(resp.body).body
74
+ return DagInfo.from_json(msgpack.loads(resp.body))
75
75
 
76
76
  async def get_dag_info(self, session_id: str, dag_id: str) -> DagInfo:
77
77
  req_url = f"{self._endpoint}/api/sessions/{session_id}/dags/{dag_id}"
78
78
  resp = await httpclient.AsyncHTTPClient().fetch(req_url, method="GET")
79
- return deserialize_serializable(resp.body).body
79
+ return DagInfo.from_json(msgpack.loads(resp.body))
80
80
 
81
81
  async def wait_dag(self, session_id: str, dag_id: str, timeout: TimeoutType = None):
82
82
  query_part = format_timeout_params(timeout)
@@ -87,7 +87,7 @@ class FrameDriverClient:
87
87
  resp = await wait_http_response(
88
88
  req_url, method="GET", request_timeout=timeout
89
89
  )
90
- info = deserialize_serializable(resp.body).body
90
+ info = DagInfo.from_json(msgpack.loads(resp.body))
91
91
  except TimeoutError:
92
92
  info = await self.get_dag_info(session_id, dag_id)
93
93
  return info
@@ -103,7 +103,7 @@ class FrameDriverClient:
103
103
  resp = await wait_http_response(
104
104
  req_url, method="DELETE", request_timeout=timeout
105
105
  )
106
- info = deserialize_serializable(resp.body).body
106
+ info = DagInfo.from_json(msgpack.loads(resp.body))
107
107
  except TimeoutError:
108
108
  info = await self.get_dag_info(session_id, dag_id)
109
109
  return info
@@ -112,13 +112,19 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
112
112
  odps_entry: ODPS,
113
113
  task_name: Optional[str] = None,
114
114
  project: Optional[str] = None,
115
- priority: Optional[str] = None,
115
+ priority: Optional[int] = None,
116
116
  running_cluster: Optional[str] = None,
117
117
  nested_instance_id: Optional[str] = None,
118
118
  major_version: Optional[str] = None,
119
119
  output_format: Optional[str] = None,
120
120
  **kwargs,
121
121
  ):
122
+ if callable(odps_options.get_priority):
123
+ default_priority = odps_options.get_priority(odps_entry)
124
+ else:
125
+ default_priority = odps_options.priority
126
+ priority = priority if priority is not None else default_priority
127
+
122
128
  self._odps_entry = odps_entry
123
129
  self._task_name = task_name
124
130
  self._project = project
@@ -126,6 +132,7 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
126
132
  self._running_cluster = running_cluster
127
133
  self._major_version = major_version
128
134
  self._output_format = output_format or MAXFRAME_OUTPUT_MSGPACK_FORMAT
135
+
129
136
  if nested_instance_id is None:
130
137
  self._nested = False
131
138
  self._instance = None
@@ -18,6 +18,7 @@ import os
18
18
  import mock
19
19
  from defusedxml import ElementTree
20
20
  from odps import ODPS
21
+ from odps import options as odps_options
21
22
 
22
23
  from ...session.consts import MAXFRAME_OUTPUT_JSON_FORMAT
23
24
  from ...session.task import MaxFrameInstanceCaller, MaxFrameTask
@@ -27,17 +28,20 @@ expected_file_dir = os.path.join(os.path.dirname(__file__), "expected-data")
27
28
 
28
29
  def test_maxframe_instance_caller_creating_session():
29
30
  o = ODPS.from_environments()
30
- task_caller = MaxFrameInstanceCaller(
31
- odps_entry=o,
32
- task_name="task_test",
33
- major_version="test_version",
34
- output_format=MAXFRAME_OUTPUT_JSON_FORMAT,
35
- priority="100",
36
- running_cluster="test_cluster",
37
- )
31
+
32
+ def create_caller(**kwargs):
33
+ kw = dict(
34
+ odps_entry=o,
35
+ task_name="task_test",
36
+ major_version="test_version",
37
+ output_format=MAXFRAME_OUTPUT_JSON_FORMAT,
38
+ running_cluster="test_cluster",
39
+ )
40
+ kw.update(**kwargs)
41
+ return MaxFrameInstanceCaller(**kw)
38
42
 
39
43
  def mock_create(self, task: MaxFrameTask, priority=None, running_cluster=None):
40
- assert priority == "100"
44
+ assert priority == 100
41
45
  assert running_cluster == "test_cluster"
42
46
  root = ElementTree.parse(
43
47
  os.path.join(expected_file_dir, "create_session.xml")
@@ -62,6 +66,20 @@ def test_maxframe_instance_caller_creating_session():
62
66
  target="maxframe_client.session.task.MaxFrameInstanceCaller",
63
67
  _wait_instance_task_ready=mock.DEFAULT,
64
68
  get_session=mock.DEFAULT,
65
- ):
66
- with mock.patch("odps.models.instances.BaseInstances.create", mock_create):
69
+ ), mock.patch("odps.models.instances.BaseInstances.create", mock_create):
70
+ task_caller = create_caller(priority=100)
71
+ task_caller.create_session()
72
+
73
+ old_priority = odps_options.priority
74
+ old_get_priority = odps_options.get_priority
75
+ try:
76
+ task_caller = create_caller(priority=100)
77
+ odps_options.priority = 100
78
+ task_caller.create_session()
79
+
80
+ odps_options.priority = None
81
+ odps_options.get_priority = lambda _: 100
67
82
  task_caller.create_session()
83
+ finally:
84
+ odps_options.priority = old_priority
85
+ odps_options.get_priority = old_get_priority
@@ -24,6 +24,7 @@ from odps import ODPS
24
24
  import maxframe.dataframe as md
25
25
  import maxframe.remote as mr
26
26
  from maxframe.core import ExecutableTuple, TileableGraph
27
+ from maxframe.lib.aio import stop_isolation
27
28
  from maxframe.protocol import ResultInfo
28
29
  from maxframe.serialization import RemoteException
29
30
  from maxframe.session import new_session
@@ -52,6 +53,7 @@ def start_mock_session(framedriver_app): # noqa: F811
52
53
  time.sleep(5) # Wait for temp table deleted
53
54
  else:
54
55
  session.reset_default()
56
+ stop_isolation()
55
57
 
56
58
 
57
59
  def test_simple_run_dataframe(start_mock_session):