PyPI - maxframe - Versions diffs - 0.1.0b1__cp38-cp38-win_amd64.whl → 0.1.0b3__cp38-cp38-win_amd64.whl - Mend

maxframe 0.1.0b1__cp38-cp38-win_amd64.whl → 0.1.0b3__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (42) hide show

maxframe/_utils.cp38-win_amd64.pyd +0 -0
maxframe/codegen.py +88 -19
maxframe/config/config.py +10 -0
maxframe/core/entity/executable.py +1 -0
maxframe/core/entity/objects.py +3 -2
maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
maxframe/core/graph/core.pyx +2 -2
maxframe/core/operator/base.py +14 -0
maxframe/dataframe/__init__.py +3 -1
maxframe/dataframe/datasource/from_records.py +4 -0
maxframe/dataframe/datasource/read_odps_query.py +295 -0
maxframe/dataframe/datasource/read_odps_table.py +1 -1
maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
maxframe/dataframe/groupby/__init__.py +4 -0
maxframe/dataframe/groupby/core.py +5 -0
maxframe/dataframe/misc/to_numeric.py +4 -0
maxframe/dataframe/window/aggregation.py +1 -24
maxframe/dataframe/window/ewm.py +0 -7
maxframe/dataframe/window/tests/test_ewm.py +0 -6
maxframe/errors.py +21 -0
maxframe/lib/aio/isolation.py +6 -1
maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
maxframe/opcodes.py +1 -0
maxframe/protocol.py +25 -5
maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
maxframe/serialization/exception.py +2 -1
maxframe/serialization/serializables/core.py +6 -1
maxframe/serialization/serializables/field.py +2 -0
maxframe/tensor/core.py +3 -3
maxframe/tests/test_codegen.py +69 -0
maxframe/tests/test_protocol.py +16 -8
maxframe/tests/utils.py +1 -0
maxframe/udf.py +15 -16
maxframe/utils.py +21 -1
{maxframe-0.1.0b1.dist-info → maxframe-0.1.0b3.dist-info}/METADATA +1 -74
{maxframe-0.1.0b1.dist-info → maxframe-0.1.0b3.dist-info}/RECORD +42 -39
{maxframe-0.1.0b1.dist-info → maxframe-0.1.0b3.dist-info}/WHEEL +1 -1
maxframe_client/clients/framedriver.py +7 -7
maxframe_client/session/task.py +31 -3
maxframe_client/session/tests/test_task.py +29 -11
maxframe_client/tests/test_session.py +2 -0
{maxframe-0.1.0b1.dist-info → maxframe-0.1.0b3.dist-info}/top_level.txt +0 -0

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 from collections import OrderedDict
 import numpy as np
@@ -22,7 +23,7 @@ from odps import ODPS
 from .... import tensor as mt
 from ....tests.utils import tn
 from ....utils import lazy_import
-from ... import read_odps_table
+from ... import read_odps_query, read_odps_table
 from ...core import DatetimeIndex, Float64Index, IndexValue, Int64Index, MultiIndex
 from ..dataframe import from_pandas as from_pandas_df
 from ..date_range import date_range
@@ -33,6 +34,7 @@ from ..from_tensor import (
 )
 from ..index import from_pandas as from_pandas_index
 from ..index import from_tileable
+from ..read_odps_query import ColumnSchema, _resolve_task_sector
 from ..series import from_pandas as from_pandas_series
 ray = lazy_import("ray")
@@ -228,6 +230,7 @@ def test_from_odps_table():
     assert df.op.table_name == test_table.full_table_name
     assert df.index_value.name is None
     assert isinstance(df.index_value.value, IndexValue.RangeIndex)
+    assert df.op.get_columns() == ["col1", "col2", "col3"]
     pd.testing.assert_series_equal(
         df.dtypes,
         pd.Series(
@@ -247,6 +250,7 @@ def test_from_odps_table():
     assert df.op.table_name == test_table.full_table_name
     assert df.index_value.name is None
     assert isinstance(df.index_value.value, IndexValue.RangeIndex)
+    assert df.op.get_columns() == ["col1", "col2"]
     pd.testing.assert_series_equal(
         df.dtypes,
         pd.Series([np.dtype("O"), np.dtype("int64")], index=["col1", "col2"]),
@@ -257,6 +261,7 @@ def test_from_odps_table():
     assert df.index_value.name == "col1"
     assert isinstance(df.index_value.value, IndexValue.Index)
     assert df.index.dtype == np.dtype("O")
+    assert df.op.get_columns() == ["col2", "col3"]
     pd.testing.assert_series_equal(
         df.dtypes,
         pd.Series([np.dtype("int64"), np.dtype("float64")], index=["col2", "col3"]),
@@ -267,6 +272,7 @@ def test_from_odps_table():
     df = read_odps_table(test_parted_table, append_partitions=True)
     assert df.op.append_partitions is True
+    assert df.op.get_columns() == ["col1", "col2", "col3", "pt"]
     pd.testing.assert_series_equal(
         df.dtypes,
         pd.Series(
@@ -280,6 +286,7 @@ def test_from_odps_table():
     )
     assert df.op.append_partitions is True
     assert df.op.partitions == ["pt=20240103"]
+    assert df.op.get_columns() == ["col1", "col2", "pt"]
     pd.testing.assert_series_equal(
         df.dtypes,
         pd.Series(
@@ -292,6 +299,67 @@ def test_from_odps_table():
     test_parted_table.drop()
+def test_from_odps_query():
+    odps_entry = ODPS.from_environments()
+    table1_name = tn("test_from_odps_query_src1")
+    table2_name = tn("test_from_odps_query_src2")
+    odps_entry.delete_table(table1_name, if_exists=True)
+    odps_entry.delete_table(table2_name, if_exists=True)
+    test_table = odps_entry.create_table(
+        table1_name, "col1 string, col2 bigint, col3 double", lifecycle=1
+    )
+    # need some data to produce complicated plans
+    odps_entry.write_table(test_table, [["A", 10, 3.5]])
+    test_table2 = odps_entry.create_table(
+        table2_name, "col1 string, col2 bigint, col3 double", lifecycle=1
+    )
+    odps_entry.write_table(test_table2, [["A", 10, 4.5]])
+    with pytest.raises(ValueError) as err_info:
+        read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
+    assert "instant query" in err_info.value.args[0]
+    query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
+    df = read_odps_query(query1)
+    assert df.op.query == query1
+    assert df.index_value.name is None
+    assert isinstance(df.index_value.value, IndexValue.RangeIndex)
+    pd.testing.assert_series_equal(
+        df.dtypes,
+        pd.Series(
+            [np.dtype("O"), np.dtype("int64"), np.dtype("float64")],
+            index=["col1", "col2", "col3"],
+        ),
+    )
+    df = read_odps_query(query1, index_col="col1")
+    assert df.op.query == query1
+    assert df.index_value.name == "col1"
+    assert isinstance(df.index_value.value, IndexValue.Index)
+    pd.testing.assert_series_equal(
+        df.dtypes,
+        pd.Series([np.dtype("int64"), np.dtype("float64")], index=["col2", "col3"]),
+    )
+    query2 = (
+        f"SELECT t1.col1, t1.col2, t1.col3 as c31, t2.col3 as c32 "
+        f"FROM {table1_name} t1 "
+        f"INNER JOIN {table2_name} t2 "
+        f"ON t1.col1 = t2.col1 AND t1.col2 = t2.col2"
+    )
+    df = read_odps_query(query2, index_col=["col1", "col2"])
+    assert df.op.query == query2
+    assert df.index_value.names == ["col1", "col2"]
+    assert isinstance(df.index_value.value, IndexValue.MultiIndex)
+    pd.testing.assert_series_equal(
+        df.dtypes,
+        pd.Series([np.dtype("float64"), np.dtype("float64")], index=["c31", "c32"]),
+    )
+    test_table.drop()
+    test_table2.drop()
 def test_date_range():
     with pytest.raises(TypeError):
         _ = date_range("2020-1-1", periods="2")
@@ -316,3 +384,18 @@ def test_date_range():
     assert dr.index_value.is_unique == expected.is_unique
     assert dr.index_value.is_monotonic_increasing == expected.is_monotonic_increasing
     assert dr.name == expected.name
+def test_resolve_task_sector():
+    input_path = os.path.join(os.path.dirname(__file__), "test-data", "task-input.txt")
+    with open(input_path, "r") as f:
+        sector = f.read()
+    actual_sector = _resolve_task_sector("job0", sector)
+    assert actual_sector.job_name == "job0"
+    assert actual_sector.task_name == "M1"
+    assert actual_sector.output_target == "Screen"
+    assert len(actual_sector.schema) == 78
+    assert actual_sector.schema[0] == ColumnSchema("unnamed: 0", "bigint", "")
+    assert actual_sector.schema[1] == ColumnSchema("id", "bigint", "id_alias")
+    assert actual_sector.schema[2] == ColumnSchema("listing_url", "string", "")

maxframe/dataframe/groupby/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # noinspection PyUnresolvedReferences
 from ..core import DataFrameGroupBy, GroupBy, SeriesGroupBy
+from .core import NamedAgg
 def _install():
@@ -25,6 +26,7 @@ def _install():
     from .fill import bfill, ffill, fillna
     from .getitem import df_groupby_getitem
     from .head import head
+    from .sample import groupby_sample
     from .transform import groupby_transform
     for cls in DATAFRAME_TYPE:
@@ -65,6 +67,8 @@ def _install():
         setattr(cls, "head", head)
+        setattr(cls, "sample", groupby_sample)
         setattr(cls, "ffill", ffill)
         setattr(cls, "bfill", bfill)
         setattr(cls, "backfill", bfill)

maxframe/dataframe/groupby/core.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from collections import namedtuple
 import pandas as pd
 from ... import opcodes
@@ -30,6 +32,9 @@ _GROUP_KEYS_NO_DEFAULT = pd_release_version >= (1, 5, 0)
 _default_group_keys = no_default if _GROUP_KEYS_NO_DEFAULT else True
+NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
 class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
     _op_type_ = opcodes.GROUPBY

maxframe/dataframe/misc/to_numeric.py CHANGED Viewed

@@ -29,6 +29,10 @@ class DataFrameToNumeric(DataFrameOperator, DataFrameOperatorMixin):
     def __init__(self, errors="raise", downcast=None, **kw):
         super().__init__(errors=errors, downcast=downcast, **kw)
+    @property
+    def input(self):
+        return self.inputs[0]
     def __call__(self, arg):
         if isinstance(arg, pd.Series):
             arg = asseries(arg)

maxframe/dataframe/window/aggregation.py CHANGED Viewed

@@ -18,14 +18,7 @@ from collections.abc import Iterable
 import numpy as np
 import pandas as pd
-from ...serialization.serializables import (
-    AnyField,
-    BoolField,
-    DictField,
-    Int32Field,
-    Int64Field,
-    StringField,
-)
+from ...serialization.serializables import AnyField, BoolField, Int32Field, Int64Field
 from ..core import DATAFRAME_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..utils import build_df, build_empty_series, parse_index
@@ -41,22 +34,6 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
     # True if function name is treated as new index
     append_index = BoolField("append_index", default=None)
-    # chunk params
-    output_agg = BoolField("output_agg", default=None)
-    map_groups = DictField("map_groups", default=None)
-    map_sources = DictField("map_sources", default=None)
-    combine_sources = DictField("combine_sources", default=None)
-    combine_columns = DictField("combine_columns", default=None)
-    combine_funcs = DictField("combine_funcs", default=None)
-    key_to_funcs = DictField("keys_to_funcs", default=None)
-    min_periods_func_name = StringField("min_periods_func_name", default=None)
-    @property
-    def output_limit(self):
-        return 2 if self.output_agg else 1
     def __call__(self, expanding):
         inp = expanding.input
         raw_func = self.func

maxframe/dataframe/window/ewm.py CHANGED Viewed

@@ -233,13 +233,6 @@ def ewm(
     if alpha <= 0 or alpha > 1:
         raise ValueError("alpha must satisfy: 0 < alpha <= 1")
-    if not adjust and not ignore_na:
-        raise NotImplementedError(
-            "adjust == False when ignore_na == False not implemented"
-        )
-    if axis == 1:
-        raise NotImplementedError("axis other than 0 is not supported")
     if alpha == 1:
         return obj.expanding(min_periods=min_periods, axis=axis)

maxframe/dataframe/window/tests/test_ewm.py CHANGED Viewed

@@ -23,9 +23,6 @@ def test_ewm():
     df = pd.DataFrame(np.random.rand(4, 3), columns=list("abc"))
     df2 = md.DataFrame(df)
-    with pytest.raises(NotImplementedError):
-        _ = df2.ewm(2, adjust=False, ignore_na=False)
     with pytest.raises(ValueError):
         _ = df2.ewm()
@@ -59,9 +56,6 @@ def test_ewm_agg():
     df = pd.DataFrame(np.random.rand(4, 3), columns=list("abc"))
     df2 = md.DataFrame(df, chunk_size=3)
-    with pytest.raises(NotImplementedError):
-        _ = df2.ewm(span=3, axis=1).agg("mean")
     r = df2.ewm(span=3).agg("mean")
     expected = df.ewm(span=3).agg("mean")

maxframe/errors.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+class MaxFrameError(Exception):
+    pass
+class MaxFrameUserError(MaxFrameError):
+    pass

maxframe/lib/aio/isolation.py CHANGED Viewed

@@ -14,11 +14,14 @@
 import asyncio
 import atexit
+import itertools
 import threading
 from typing import Dict, Optional
 class Isolation:
+    _counter = itertools.count().__next__
     loop: asyncio.AbstractEventLoop
     _stopped: Optional[asyncio.Event]
     _thread: Optional[threading.Thread]
@@ -38,7 +41,9 @@ class Isolation:
     def start(self):
         if self._threaded:
-            self._thread = thread = threading.Thread(target=self._run)
+            self._thread = thread = threading.Thread(
+                name=f"IsolationThread-{self._counter()}", target=self._run
+            )
             thread.daemon = True
             thread.start()
             self._thread_ident = thread.ident

maxframe/lib/mmh3.cp38-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/opcodes.py CHANGED Viewed

@@ -462,6 +462,7 @@ READ_ODPS_TABLE = 20111
 TO_ODPS_TABLE = 20112
 READ_ODPS_VOLUME = 20113
 TO_ODPS_VOLUME = 20114
+READ_ODPS_QUERY = 20115
 TO_CSV_STAT = 2102

maxframe/protocol.py CHANGED Viewed

@@ -46,6 +46,8 @@ BodyType = TypeVar("BodyType", bound="Serializable")
 class JsonSerializable(Serializable):
+    _ignore_non_existing_keys = True
     @classmethod
     def from_json(cls, serialized: dict) -> "JsonSerializable":
         raise NotImplementedError
@@ -209,7 +211,10 @@ class ErrorInfo(JsonSerializable):
         kw["raw_error_source"] = ErrorSource(serialized["raw_error_source"])
         if kw.get("raw_error_data"):
             bufs = [base64.b64decode(s) for s in kw["raw_error_data"]]
-            kw["raw_error_data"] = pickle.loads(bufs[0], buffers=bufs[1:])
+            try:
+                kw["raw_error_data"] = pickle.loads(bufs[0], buffers=bufs[1:])
+            except:
+                kw["raw_error_data"] = None
         return cls(**kw)
     def to_json(self) -> dict:
@@ -242,6 +247,8 @@ class DagInfo(JsonSerializable):
         default_factory=dict,
     )
     error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
+    start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
+    end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
     @classmethod
     def from_json(cls, serialized: dict) -> "DagInfo":
@@ -262,7 +269,10 @@ class DagInfo(JsonSerializable):
             "dag_id": self.dag_id,
             "status": self.status.value,
             "progress": self.progress,
+            "start_timestamp": self.start_timestamp,
+            "end_timestamp": self.end_timestamp,
         }
+        ret = {k: v for k, v in ret.items() if v is not None}
         if self.tileable_to_result_infos:
             ret["tileable_to_result_infos"] = {
                 k: v.to_json() for k, v in self.tileable_to_result_infos.items()
@@ -278,12 +288,18 @@ class CreateSessionRequest(Serializable):
 class SessionInfo(JsonSerializable):
     session_id: str = StringField("session_id")
-    settings: Dict[str, Any] = DictField("settings", key_type=FieldTypes.string)
-    start_timestamp: float = Float64Field("start_timestamp")
-    idle_timestamp: float = Float64Field("idle_timestamp")
+    settings: Dict[str, Any] = DictField(
+        "settings", key_type=FieldTypes.string, default=None
+    )
+    start_timestamp: float = Float64Field("start_timestamp", default=None)
+    idle_timestamp: float = Float64Field("idle_timestamp", default=None)
     dag_infos: Dict[str, Optional[DagInfo]] = DictField(
-        "dag_infos", key_type=FieldTypes.string, value_type=FieldTypes.reference
+        "dag_infos",
+        key_type=FieldTypes.string,
+        value_type=FieldTypes.reference,
+        default=None,
     )
+    error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
     @classmethod
     def from_json(cls, serialized: dict) -> "SessionInfo":
@@ -292,6 +308,8 @@ class SessionInfo(JsonSerializable):
             kw["dag_infos"] = {
                 k: DagInfo.from_json(v) for k, v in kw["dag_infos"].items()
             }
+        if kw.get("error_info"):
+            kw["error_info"] = ErrorInfo.from_json(kw["error_info"])
         return SessionInfo(**kw)
     def to_json(self) -> dict:
@@ -303,6 +321,8 @@ class SessionInfo(JsonSerializable):
         }
         if self.dag_infos:
             ret["dag_infos"] = {k: v.to_json() for k, v in self.dag_infos.items()}
+        if self.error_info:
+            ret["error_info"] = self.error_info.to_json()
         return ret

maxframe/serialization/core.cp38-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/serialization/exception.py CHANGED Viewed

@@ -16,13 +16,14 @@ import logging
 import traceback
 from typing import Dict, List
+from ..errors import MaxFrameError
 from ..lib import wrapped_pickle as pickle
 from .core import Serializer, buffered, pickle_buffers, unpickle_buffers
 logger = logging.getLogger(__name__)
-class RemoteException(Exception):
+class RemoteException(MaxFrameError):
     def __init__(
         self, messages: List[str], tracebacks: List[List[str]], buffers: List[bytes]
     ):

maxframe/serialization/serializables/core.py CHANGED Viewed

@@ -112,6 +112,7 @@ class Serializable(metaclass=SerializableMeta):
     __slots__ = ("__weakref__",)
     _cache_primitive_serial = False
+    _ignore_non_existing_keys = False
     _FIELDS: Dict[str, Field]
     _FIELD_ORDER: List[str]
@@ -128,7 +129,11 @@ class Serializable(metaclass=SerializableMeta):
         else:
             values = kwargs
         for k, v in values.items():
-            fields[k].set(self, v)
+            try:
+                fields[k].set(self, v)
+            except KeyError:
+                if not self._ignore_non_existing_keys:
+                    raise
     def __on_deserialize__(self):
         pass

maxframe/serialization/serializables/field.py CHANGED Viewed

@@ -507,12 +507,14 @@ class ReferenceField(Field):
         tag: str,
         reference_type: Union[str, Type] = None,
         default: Any = no_default,
+        default_factory: Optional[Callable] = None,
         on_serialize: Callable[[Any], Any] = None,
         on_deserialize: Callable[[Any], Any] = None,
     ):
         super().__init__(
             tag,
             default=default,
+            default_factory=default_factory,
             on_serialize=on_serialize,
             on_deserialize=on_deserialize,
         )

maxframe/tensor/core.py CHANGED Viewed

@@ -43,7 +43,7 @@ from ..serialization.serializables import (
     StringField,
     TupleField,
 )
-from ..utils import on_deserialize_shape, on_serialize_shape
+from ..utils import on_deserialize_shape, on_serialize_shape, skip_na_call
 from .utils import fetch_corner_data, get_chunk_slices
 logger = logging.getLogger(__name__)
@@ -181,8 +181,8 @@ class TensorData(HasShapeTileableData, _ExecuteAndFetchMixin):
     _chunks = ListField(
         "chunks",
         FieldTypes.reference(TensorChunkData),
-        on_serialize=lambda x: [it.data for it in x] if x is not None else x,
-        on_deserialize=lambda x: [TensorChunk(it) for it in x] if x is not None else x,
+        on_serialize=skip_na_call(lambda x: [it.data for it in x]),
+        on_deserialize=skip_na_call(lambda x: [TensorChunk(it) for it in x]),
     )
     def __init__(

maxframe/tests/test_codegen.py ADDED Viewed

@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+from typing import List, Tuple
+# 使用pytest生成单元测试
+import pytest
+from maxframe.codegen import UserCodeMixin
+from maxframe.lib import wrapped_pickle
+from maxframe.serialization.core import PickleContainer
+@pytest.mark.parametrize(
+    "input_obj, expected_output",
+    [
+        (None, "None"),
+        (10, "10"),
+        (3.14, "3.14"),
+        (True, "True"),
+        (False, "False"),
+        (b"hello", "base64.b64decode(b'aGVsbG8=')"),
+        ("hello", "'hello'"),
+        ([1, 2, 3], "[1, 2, 3]"),
+        ({"a": 1, "b": 2}, "{'a': 1, 'b': 2}"),
+        ((1, 2, 3), "(1, 2, 3)"),
+        ((1,), "(1,)"),
+        ((), "()"),
+        ({1, 2, 3}, "{1, 2, 3}"),
+        (set(), "set()"),
+    ],
+)
+def test_obj_to_python_expr(input_obj, expected_output):
+    assert UserCodeMixin.obj_to_python_expr(input_obj) == expected_output
+def test_obj_to_python_expr_custom_object():
+    class CustomClass:
+        def __init__(self, a: int, b: List[int], c: Tuple[int, int]):
+            self.a = a
+            self.b = b
+            self.c = c
+    custom_obj = CustomClass(1, [2, 3], (4, 5))
+    pickle_data = wrapped_pickle.dumps(custom_obj)
+    pickle_str = base64.b64encode(pickle_data)
+    custom_obj_pickle_container = PickleContainer([pickle_data])
+    # with class obj will not support currently
+    with pytest.raises(ValueError):
+        UserCodeMixin.obj_to_python_expr(custom_obj)
+    assert (
+        UserCodeMixin.obj_to_python_expr(custom_obj_pickle_container)
+        == f"cloudpickle.loads(base64.b64decode({pickle_str}), buffers=[])"
+    )

maxframe/tests/test_protocol.py CHANGED Viewed

@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
 import time
 import pytest
@@ -29,28 +31,32 @@ from ..serialization import RemoteException
 from ..utils import deserialize_serializable, serialize_serializable
+def _json_round_trip(json_data: dict) -> dict:
+    return json.loads(json.dumps(json_data))
 def test_result_info_json_serialize():
-    ri = ResultInfo.from_json(ResultInfo().to_json())
+    ri = ResultInfo.from_json(_json_round_trip(ResultInfo().to_json()))
     assert type(ri) is ResultInfo
     ri = ODPSTableResultInfo(
         full_table_name="table_name", partition_specs=["pt=partition"]
     )
-    deserial_ri = ResultInfo.from_json(ri.to_json())
+    deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
     assert type(ri) is ODPSTableResultInfo
     assert ri.result_type == deserial_ri.result_type
     assert ri.full_table_name == deserial_ri.full_table_name
     assert ri.partition_specs == deserial_ri.partition_specs
     ri = ODPSTableResultInfo(full_table_name="table_name")
-    deserial_ri = ResultInfo.from_json(ri.to_json())
+    deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
     assert type(ri) is ODPSTableResultInfo
     assert ri.result_type == deserial_ri.result_type
     assert ri.full_table_name == deserial_ri.full_table_name
     assert ri.partition_specs == deserial_ri.partition_specs
     ri = ODPSVolumeResultInfo(volume_name="vol_name", volume_path="vol_path")
-    deserial_ri = ResultInfo.from_json(ri.to_json())
+    deserial_ri = ResultInfo.from_json(_json_round_trip(ri.to_json()))
     assert type(ri) is ODPSVolumeResultInfo
     assert ri.result_type == deserial_ri.result_type
     assert ri.volume_name == deserial_ri.volume_name
@@ -63,7 +69,7 @@ def test_error_info_json_serialize():
     except ValueError as ex:
         err_info = ErrorInfo.from_exception(ex)
-    deserial_err_info = ErrorInfo.from_json(err_info.to_json())
+    deserial_err_info = ErrorInfo.from_json(_json_round_trip(err_info.to_json()))
     assert deserial_err_info.error_messages == err_info.error_messages
     assert isinstance(deserial_err_info.raw_error_data, ValueError)
@@ -73,7 +79,7 @@ def test_error_info_json_serialize():
     with pytest.raises(RemoteException):
         mf_err_info.reraise()
-    deserial_err_info = ErrorInfo.from_json(mf_err_info.to_json())
+    deserial_err_info = ErrorInfo.from_json(_json_round_trip(mf_err_info.to_json()))
     assert isinstance(deserial_err_info.raw_error_data, ValueError)
     with pytest.raises(ValueError):
         deserial_err_info.reraise()
@@ -94,7 +100,9 @@ def test_dag_info_json_serialize():
         },
         error_info=err_info,
     )
-    deserial_info = DagInfo.from_json(info.to_json())
+    json_info = info.to_json()
+    json_info["non_existing_field"] = "non_existing"
+    deserial_info = DagInfo.from_json(_json_round_trip(json_info))
     assert deserial_info.session_id == info.session_id
     assert deserial_info.dag_id == info.dag_id
     assert deserial_info.status == info.status
@@ -121,7 +129,7 @@ def test_session_info_json_serialize():
         idle_timestamp=None,
         dag_infos={"test_dag_id": dag_info},
     )
-    deserial_info = SessionInfo.from_json(info.to_json())
+    deserial_info = SessionInfo.from_json(_json_round_trip(info.to_json()))
     assert deserial_info.session_id == info.session_id
     assert deserial_info.settings == info.settings
     assert deserial_info.start_timestamp == info.start_timestamp

maxframe/tests/utils.py CHANGED Viewed

@@ -104,6 +104,7 @@ def run_app_in_thread(app_func):
         q = queue.Queue()
         exit_event = asyncio.Event(loop=app_loop)
         app_thread = Thread(
+            name="TestAppThread",
             target=app_thread_func,
             args=(app_loop, q, exit_event, args, kwargs),
         )