PyPI - maxframe - Versions diffs - 1.0.0rc3__cp38-cp38-macosx_10_9_universal2.whl → 1.1.0__cp38-cp38-macosx_10_9_universal2.whl - Mend

maxframe 1.0.0rc3__cp38-cp38-macosx_10_9_universal2.whl → 1.1.0__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (112) hide show

maxframe/_utils.cpython-38-darwin.so +0 -0
maxframe/codegen.py +1 -0
maxframe/config/config.py +16 -1
maxframe/conftest.py +52 -14
maxframe/core/entity/executable.py +1 -1
maxframe/core/graph/core.cpython-38-darwin.so +0 -0
maxframe/core/operator/base.py +2 -0
maxframe/dataframe/arithmetic/docstring.py +26 -2
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
maxframe/dataframe/core.py +26 -2
maxframe/dataframe/datasource/read_odps_query.py +116 -28
maxframe/dataframe/datasource/read_odps_table.py +3 -1
maxframe/dataframe/datasource/tests/test_datasource.py +93 -12
maxframe/dataframe/datastore/to_odps.py +7 -0
maxframe/dataframe/extensions/__init__.py +8 -0
maxframe/dataframe/extensions/apply_chunk.py +649 -0
maxframe/dataframe/extensions/flatjson.py +131 -0
maxframe/dataframe/extensions/flatmap.py +314 -0
maxframe/dataframe/extensions/reshuffle.py +1 -1
maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
maxframe/dataframe/groupby/__init__.py +1 -0
maxframe/dataframe/groupby/aggregation.py +1 -0
maxframe/dataframe/groupby/apply.py +9 -1
maxframe/dataframe/groupby/core.py +1 -1
maxframe/dataframe/groupby/fill.py +4 -1
maxframe/dataframe/groupby/getitem.py +6 -0
maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
maxframe/dataframe/groupby/transform.py +8 -2
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/loc.py +6 -4
maxframe/dataframe/indexing/rename.py +11 -0
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/merge/__init__.py +9 -1
maxframe/dataframe/merge/concat.py +41 -31
maxframe/dataframe/merge/merge.py +1 -1
maxframe/dataframe/merge/tests/test_merge.py +3 -1
maxframe/dataframe/misc/apply.py +3 -0
maxframe/dataframe/misc/drop_duplicates.py +23 -2
maxframe/dataframe/misc/map.py +3 -1
maxframe/dataframe/misc/tests/test_misc.py +24 -2
maxframe/dataframe/misc/transform.py +22 -13
maxframe/dataframe/reduction/__init__.py +3 -0
maxframe/dataframe/reduction/aggregation.py +1 -0
maxframe/dataframe/reduction/median.py +56 -0
maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
maxframe/dataframe/statistics/quantile.py +8 -2
maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/dataframe/tests/test_utils.py +60 -0
maxframe/dataframe/utils.py +110 -7
maxframe/dataframe/window/expanding.py +5 -3
maxframe/dataframe/window/tests/test_expanding.py +2 -2
maxframe/io/objects/tests/test_object_io.py +39 -12
maxframe/io/odpsio/arrow.py +30 -2
maxframe/io/odpsio/schema.py +28 -8
maxframe/io/odpsio/tableio.py +55 -133
maxframe/io/odpsio/tests/test_schema.py +40 -4
maxframe/io/odpsio/tests/test_tableio.py +5 -5
maxframe/io/odpsio/tests/test_volumeio.py +35 -11
maxframe/io/odpsio/volumeio.py +36 -6
maxframe/learn/contrib/__init__.py +3 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/llm/__init__.py +16 -0
maxframe/learn/contrib/llm/core.py +54 -0
maxframe/learn/contrib/llm/models/__init__.py +14 -0
maxframe/learn/contrib/llm/models/dashscope.py +73 -0
maxframe/learn/contrib/llm/multi_modal.py +42 -0
maxframe/learn/contrib/llm/text.py +42 -0
maxframe/learn/contrib/xgboost/classifier.py +3 -3
maxframe/learn/contrib/xgboost/predict.py +8 -39
maxframe/learn/contrib/xgboost/train.py +4 -3
maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
maxframe/lib/sparse/tests/test_sparse.py +15 -15
maxframe/opcodes.py +10 -1
maxframe/protocol.py +6 -1
maxframe/serialization/core.cpython-38-darwin.so +0 -0
maxframe/serialization/core.pyx +13 -1
maxframe/serialization/pandas.py +50 -20
maxframe/serialization/serializables/core.py +24 -5
maxframe/serialization/serializables/field_type.py +4 -1
maxframe/serialization/serializables/tests/test_serializable.py +8 -1
maxframe/serialization/tests/test_serial.py +2 -1
maxframe/session.py +9 -2
maxframe/tensor/__init__.py +19 -7
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/concatenate.py +23 -20
maxframe/tensor/merge/vstack.py +5 -1
maxframe/tensor/misc/transpose.py +1 -1
maxframe/tests/utils.py +16 -0
maxframe/udf.py +27 -0
maxframe/utils.py +64 -14
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/RECORD +112 -96
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
maxframe_client/clients/framedriver.py +4 -1
maxframe_client/fetcher.py +28 -10
maxframe_client/session/consts.py +3 -0
maxframe_client/session/odps.py +104 -20
maxframe_client/session/task.py +42 -26
maxframe_client/session/tests/test_task.py +0 -4
maxframe_client/tests/test_session.py +44 -12
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0

maxframe/learn/contrib/llm/multi_modal.py ADDED Viewed

@@ -0,0 +1,42 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict
+from ....dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
+from .core import LLM
+class MultiModalLLM(LLM):
+    def generate(
+        self,
+        data,
+        prompt_template: Dict[str, Any],
+        params: Dict[str, Any] = None,
+    ):
+        raise NotImplementedError
+def generate(
+    data,
+    model: MultiModalLLM,
+    prompt_template: Dict[str, Any],
+    params: Dict[str, Any] = None,
+):
+    if not isinstance(data, DATAFRAME_TYPE) and not isinstance(data, SERIES_TYPE):
+        raise ValueError("data must be a maxframe dataframe or series object")
+    if not isinstance(model, MultiModalLLM):
+        raise ValueError("model must be a MultiModalLLM object")
+    params = params if params is not None else dict()
+    model.validate_params(params)
+    return model.generate(data, prompt_template, params)

maxframe/learn/contrib/llm/text.py ADDED Viewed

@@ -0,0 +1,42 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict
+from ....dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
+from .core import LLM
+class TextLLM(LLM):
+    def generate(
+        self,
+        data,
+        prompt_template: Dict[str, Any],
+        params: Dict[str, Any] = None,
+    ):
+        raise NotImplementedError
+def generate(
+    data,
+    model: TextLLM,
+    prompt_template: Dict[str, Any],
+    params: Dict[str, Any] = None,
+):
+    if not isinstance(data, DATAFRAME_TYPE) and not isinstance(data, SERIES_TYPE):
+        raise ValueError("data must be a maxframe dataframe or series object")
+    if not isinstance(model, TextLLM):
+        raise ValueError("model must be a TextLLM object")
+    params = params if params is not None else dict()
+    model.validate_params(params)
+    return model.generate(data, prompt_template, params)

maxframe/learn/contrib/xgboost/classifier.py CHANGED Viewed

@@ -14,7 +14,8 @@
 import numpy as np
-from ....tensor import argmax, transpose, vstack
+from ....tensor import argmax, transpose
+from ....tensor.merge.vstack import _vstack
 from ..utils import make_import_error_func
 from .core import XGBScikitLearnBase, xgboost
@@ -89,7 +90,6 @@ else:
             if ntree_limit is not None:
                 raise NotImplementedError("ntree_limit is not currently supported")
             prediction = predict(self.get_booster(), data, flag=flag, **kw)
             if len(prediction.shape) == 2 and prediction.shape[1] == self.n_classes_:
                 # multi-class
                 return prediction
@@ -103,7 +103,7 @@ else:
             # binary logistic function
             classone_probs = prediction
             classzero_probs = 1.0 - classone_probs
-            return transpose(vstack((classzero_probs, classone_probs)))
+            return transpose(_vstack((classzero_probs, classone_probs)))
         @property
         def classes_(self) -> np.ndarray:

maxframe/learn/contrib/xgboost/predict.py CHANGED Viewed

@@ -14,20 +14,18 @@
 import numpy as np
-import pandas as pd
 from .... import opcodes
 from ....core.entity.output_types import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
-from ....dataframe.utils import parse_index
 from ....serialization.serializables import (
     BoolField,
     KeyField,
     ReferenceField,
     TupleField,
 )
-from ....tensor.core import TENSOR_TYPE, TensorOrder
+from ....tensor.core import TensorOrder
 from .core import BoosterData
 from .dmatrix import check_data
@@ -65,35 +63,12 @@ class XGBPredict(Operator, TileableOperatorMixin):
         else:
             shape = (self.data.shape[0],)
         inputs = [self.data, self.model]
-        if self.output_types[0] == OutputType.tensor:
-            # tensor
-            return self.new_tileable(
-                inputs,
-                shape=shape,
-                dtype=self.output_dtype,
-                order=TensorOrder.C_ORDER,
-            )
-        elif self.output_types[0] == OutputType.dataframe:
-            # dataframe
-            dtypes = pd.DataFrame(
-                np.random.rand(0, num_class), dtype=self.output_dtype
-            ).dtypes
-            return self.new_tileable(
-                inputs,
-                shape=shape,
-                dtypes=dtypes,
-                columns_value=parse_index(dtypes.index),
-                index_value=self.data.index_value,
-            )
-        else:
-            # series
-            return self.new_tileable(
-                inputs,
-                shape=shape,
-                index_value=self.data.index_value,
-                name="predictions",
-                dtype=self.output_dtype,
-            )
+        return self.new_tileable(
+            inputs,
+            shape=shape,
+            dtype=self.output_dtype,
+            order=TensorOrder.C_ORDER,
+        )
 def predict(
@@ -124,13 +99,7 @@ def predict(
     data = check_data(data)
     # TODO: check model datatype
-    num_class = getattr(model.op, "num_class", None)
-    if isinstance(data, TENSOR_TYPE):
-        output_types = [OutputType.tensor]
-    elif num_class is not None:
-        output_types = [OutputType.dataframe]
-    else:
-        output_types = [OutputType.series]
+    output_types = [OutputType.tensor]
     iteration_range = iteration_range or (0, 0)

maxframe/learn/contrib/xgboost/train.py CHANGED Viewed

@@ -102,7 +102,7 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
     Parameters
     ----------
     Parameters are the same as `xgboost.train`. Note that train is an eager-execution
-    API. The call will be blocked until training finished.
+    API if evals is passed, thus the call will be blocked until training finished.
     Returns
     -------
@@ -121,11 +121,12 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
                 processed_evals.append((eval_dmatrix, name))
             else:
                 processed_evals.append((to_dmatrix(eval_dmatrix), name))
-    return XGBTrain(
+    data = XGBTrain(
         params=params,
         dtrain=dtrain,
         evals=processed_evals,
         evals_result=evals_result,
         num_class=num_class,
         **kwargs,
-    )(evals_result).execute(session=session, **run_kwargs)
+    )(evals_result)
+    return data.execute(session=session, **run_kwargs) if evals else data

maxframe/lib/mmh3.cpython-38-darwin.so CHANGED Viewed

Binary file

maxframe/lib/sparse/tests/test_sparse.py CHANGED Viewed

@@ -55,13 +55,13 @@ def test_sparse_creation():
     s = SparseNDArray(s1_data)
     assert s.ndim == 2
     assert isinstance(s, SparseMatrix)
-    assert_array_equal(s.toarray(), s1_data.A)
-    assert_array_equal(s.todense(), s1_data.A)
+    assert_array_equal(s.toarray(), s1_data.toarray())
+    assert_array_equal(s.todense(), s1_data.toarray())
     ss = pickle.loads(pickle.dumps(s))
     assert s == ss
-    assert_array_equal(ss.toarray(), s1_data.A)
-    assert_array_equal(ss.todense(), s1_data.A)
+    assert_array_equal(ss.toarray(), s1_data.toarray())
+    assert_array_equal(ss.todense(), s1_data.toarray())
     v = SparseNDArray(v1, shape=(3,))
     assert s.ndim
@@ -331,12 +331,12 @@ def test_sparse_dot():
     assert_array_equal(mls.dot(s1, v1_s), s1.dot(v1_data))
     assert_array_equal(mls.dot(s2, v1_s), s2.dot(v1_data))
-    assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.A))
-    assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.A))
+    assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.toarray()))
+    assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.toarray()))
     assert_array_equal(mls.dot(v1_s, v1_s), v1_data.dot(v1_data), almost=True)
     assert_array_equal(mls.dot(v2_s, v2_s), v2_data.dot(v2_data), almost=True)
-    assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.A))
+    assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.toarray()))
     assert_array_equal(mls.dot(v1_s, v1_s, sparse=False), v1_data.dot(v1_data))
@@ -390,7 +390,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(3)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, 3)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -399,7 +399,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(3, wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, 3, wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -408,7 +408,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal([1, 2, 3])
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, [1, 2, 3])
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -417,7 +417,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal([1, 2, 3], wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, [1, 2, 3], wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -427,7 +427,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -437,7 +437,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val, wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val, wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -447,7 +447,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -457,7 +457,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val, wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val, wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)

maxframe/opcodes.py CHANGED Viewed

@@ -270,6 +270,7 @@ KURTOSIS = 351
 SEM = 352
 STR_CONCAT = 353
 MAD = 354
+MEDIAN = 355
 # tensor operator
 RESHAPE = 401
@@ -377,7 +378,6 @@ DROP_DUPLICATES = 728
 MELT = 729
 RENAME = 731
 INSERT = 732
-MAP_CHUNK = 733
 CARTESIAN_CHUNK = 734
 EXPLODE = 735
 REPLACE = 736
@@ -392,6 +392,10 @@ PIVOT_TABLE = 744
 FUSE = 801
+# LLM
+DASHSCOPE_TEXT_GENERATION = 810
+DASHSCOPE_MULTI_MODAL_GENERATION = 811
 # table like input for tensor
 TABLE_COO = 1003
 # store tensor as coo format
@@ -532,6 +536,8 @@ STATSMODELS_TRAIN = 3012
 STATSMODELS_PREDICT = 3013
 # learn
+CONNECTED_COMPONENTS = 3100
 # checks
 CHECK_NON_NEGATIVE = 3300
 # classifier check targets
@@ -566,6 +572,9 @@ CHOLESKY_FUSE = 999988
 # MaxFrame-dedicated functions
 DATAFRAME_RESHUFFLE = 10001
+FLATMAP = 10002
+FLATJSON = 10003
+APPLY_CHUNK = 10004
 # MaxFrame internal operators
 DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001

maxframe/protocol.py CHANGED Viewed

@@ -375,6 +375,11 @@ class ExecuteDagRequest(Serializable):
         value_type=FieldTypes.reference,
         default=None,
     )
+    new_settings: Dict[str, Any] = DictField(
+        "new_settings",
+        key_type=FieldTypes.string,
+        default=None,
+    )
 class SubDagSubmitInstanceInfo(JsonSerializable):
@@ -511,7 +516,7 @@ class DataFrameTableMeta(JsonSerializable):
         return True
     def to_json(self) -> dict:
-        b64_pk = lambda x: base64.b64encode(pickle.dumps(x))
+        b64_pk = lambda x: base64.b64encode(pickle.dumps(x)).decode()
         ret = {
             "table_name": self.table_name,
             "type": self.type.value,

maxframe/serialization/core.cpython-38-darwin.so CHANGED Viewed

Binary file

maxframe/serialization/core.pyx CHANGED Viewed

@@ -37,7 +37,7 @@ from .._utils import NamedType
 from .._utils cimport TypeDispatcher
 from ..lib import wrapped_pickle as pickle
-from ..utils import arrow_type_from_str
+from ..utils import NoDefault, arrow_type_from_str, no_default
 try:
     from pandas import ArrowDtype
@@ -94,6 +94,7 @@ cdef:
     int COMPLEX_SERIALIZER = 12
     int SLICE_SERIALIZER = 13
     int REGEX_SERIALIZER = 14
+    int NO_DEFAULT_SERIALIZER = 15
     int PLACEHOLDER_SERIALIZER = 4096
@@ -803,6 +804,16 @@ cdef class RegexSerializer(Serializer):
         return re.compile((<bytes>(subs[0])).decode(), serialized[0])
+cdef class NoDefaultSerializer(Serializer):
+    serializer_id = NO_DEFAULT_SERIALIZER
+    cpdef serial(self, object obj, dict context):
+        return [], [], True
+    cpdef deserial(self, list obj, dict context, list subs):
+        return no_default
 cdef class Placeholder:
     """
     Placeholder object to reduce duplicated serialization
@@ -857,6 +868,7 @@ DtypeSerializer.register(ExtensionDtype)
 ComplexSerializer.register(complex)
 SliceSerializer.register(slice)
 RegexSerializer.register(re.Pattern)
+NoDefaultSerializer.register(NoDefault)
 PlaceholderSerializer.register(Placeholder)

maxframe/serialization/pandas.py CHANGED Viewed

@@ -134,8 +134,10 @@ class ArraySerializer(Serializer):
                 data_parts = [obj.tolist()]
             else:
                 data_parts = [obj.to_numpy().tolist()]
-        else:
+        elif hasattr(obj, "_data"):
             data_parts = [getattr(obj, "_data")]
+        else:
+            data_parts = [getattr(obj, "_pa_array")]
         return [ser_type], [dtype] + data_parts, False
     def deserial(self, serialized: List, context: Dict, subs: List):
@@ -155,38 +157,66 @@ class PdTimestampSerializer(Serializer):
         else:
             zone_info = []
             ts = obj.to_pydatetime().timestamp()
-        return (
-            [int(ts), obj.microsecond, obj.nanosecond],
-            zone_info,
-            bool(zone_info),
-        )
+        elements = [int(ts), obj.microsecond, obj.nanosecond]
+        if hasattr(obj, "unit"):
+            elements.append(str(obj.unit))
+        return elements, zone_info, bool(zone_info)
     def deserial(self, serialized: List, context: Dict, subs: List):
         if subs:
-            val = pd.Timestamp.utcfromtimestamp(serialized[0]).replace(
-                microsecond=serialized[1], nanosecond=serialized[2]
-            )
-            val = val.replace(tzinfo=datetime.timezone.utc).tz_convert(subs[0])
+            pydt = datetime.datetime.utcfromtimestamp(serialized[0])
+            kwargs = {
+                "year": pydt.year,
+                "month": pydt.month,
+                "day": pydt.day,
+                "hour": pydt.hour,
+                "minute": pydt.minute,
+                "second": pydt.second,
+                "microsecond": serialized[1],
+                "nanosecond": serialized[2],
+                "tzinfo": datetime.timezone.utc,
+            }
+            if len(serialized) > 3:
+                kwargs["unit"] = serialized[3]
+            val = pd.Timestamp(**kwargs).tz_convert(subs[0])
         else:
-            val = pd.Timestamp.fromtimestamp(serialized[0]).replace(
-                microsecond=serialized[1], nanosecond=serialized[2]
-            )
+            pydt = datetime.datetime.fromtimestamp(serialized[0])
+            kwargs = {
+                "year": pydt.year,
+                "month": pydt.month,
+                "day": pydt.day,
+                "hour": pydt.hour,
+                "minute": pydt.minute,
+                "second": pydt.second,
+                "microsecond": serialized[1],
+                "nanosecond": serialized[2],
+            }
+            if len(serialized) >= 4:
+                kwargs["unit"] = serialized[3]
+            val = pd.Timestamp(**kwargs)
         return val
 class PdTimedeltaSerializer(Serializer):
     def serial(self, obj: pd.Timedelta, context: Dict):
-        return [int(obj.seconds), obj.microseconds, obj.nanoseconds, obj.days], [], True
+        elements = [int(obj.seconds), obj.microseconds, obj.nanoseconds, obj.days]
+        if hasattr(obj, "unit"):
+            elements.append(str(obj.unit))
+        return elements, [], True
     def deserial(self, serialized: List, context: Dict, subs: List):
         days = 0 if len(serialized) < 4 else serialized[3]
+        unit = None if len(serialized) < 5 else serialized[4]
         seconds, microseconds, nanoseconds = serialized[:3]
-        return pd.Timedelta(
-            days=days,
-            seconds=seconds,
-            microseconds=microseconds,
-            nanoseconds=nanoseconds,
-        )
+        kwargs = {
+            "days": days,
+            "seconds": seconds,
+            "microseconds": microseconds,
+            "nanoseconds": nanoseconds,
+        }
+        if unit is not None:
+            kwargs["unit"] = unit
+        return pd.Timedelta(**kwargs)
 class NoDefaultSerializer(Serializer):

maxframe/serialization/serializables/core.py CHANGED Viewed

@@ -19,6 +19,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type
 import msgpack
 from ...lib.mmh3 import hash
+from ...utils import no_default
 from ..core import Placeholder, Serializer, buffered, load_type
 from .field import Field
 from .field_type import DictType, ListType, PrimitiveFieldType, TupleType
@@ -211,6 +212,22 @@ class _NoFieldValue:
 _no_field_value = _NoFieldValue()
+def _to_primitive_placeholder(v: Any) -> Any:
+    if v is _no_field_value or v is no_default:
+        return {}
+    return v
+def _restore_primitive_placeholder(v: Any) -> Any:
+    if type(v) is dict:
+        if v == {}:
+            return _no_field_value
+        else:
+            return v
+    else:
+        return v
 class SerializableSerializer(Serializer):
     """
     Leverage DictSerializer to perform serde.
@@ -241,9 +258,7 @@ class SerializableSerializer(Serializer):
         else:
             primitive_vals = self._get_field_values(obj, obj._PRIMITIVE_FIELDS)
             # replace _no_field_value as {} to make them msgpack-serializable
-            primitive_vals = [
-                v if v is not _no_field_value else {} for v in primitive_vals
-            ]
+            primitive_vals = [_to_primitive_placeholder(v) for v in primitive_vals]
             if obj._cache_primitive_serial:
                 primitive_vals = msgpack.dumps(primitive_vals)
                 _primitive_serial_cache[obj] = primitive_vals
@@ -311,7 +326,9 @@ class SerializableSerializer(Serializer):
                 cls_fields = server_fields[server_field_num : field_num + count]
                 cls_values = values[field_num : field_num + count]
                 for field, value in zip(cls_fields, cls_values):
-                    if not is_primitive or value != {}:
+                    if is_primitive:
+                        value = _restore_primitive_placeholder(value)
+                    if not is_primitive or value is not _no_field_value:
                         cls._set_field_value(obj, field, value)
                 field_num += count
                 try:
@@ -356,7 +373,9 @@ class SerializableSerializer(Serializer):
                 server_fields + deprecated_fields, key=lambda f: f.name
             )
             for field, value in zip(server_fields, values):
-                if not is_primitive or value != {}:
+                if is_primitive:
+                    value = _restore_primitive_placeholder(value)
+                if not is_primitive or value is not _no_field_value:
                     try:
                         cls._set_field_value(obj, field, value)
                     except AttributeError:  # pragma: no cover

maxframe/serialization/serializables/field_type.py CHANGED Viewed

@@ -46,6 +46,9 @@ class PrimitiveType(Enum):
     complex128 = 25
+_np_unicode = np.unicode_ if hasattr(np, "unicode_") else np.str_
 _primitive_type_to_valid_types = {
     PrimitiveType.bool: (bool, np.bool_),
     PrimitiveType.int8: (int, np.int8),
@@ -60,7 +63,7 @@ _primitive_type_to_valid_types = {
     PrimitiveType.float32: (float, np.float32),
     PrimitiveType.float64: (float, np.float64),
     PrimitiveType.bytes: (bytes, np.bytes_),
-    PrimitiveType.string: (str, np.unicode_),
+    PrimitiveType.string: (str, _np_unicode),
     PrimitiveType.complex64: (complex, np.complex64),
     PrimitiveType.complex128: (complex, np.complex128),
 }

maxframe/serialization/serializables/tests/test_serializable.py CHANGED Viewed

@@ -21,6 +21,7 @@ import pytest
 from ....core import EntityData
 from ....lib.wrapped_pickle import switch_unpickle
+from ....utils import no_default
 from ... import deserialize, serialize
 from .. import (
     AnyField,
@@ -143,6 +144,7 @@ class MySerializable(Serializable):
         oneof1_val=f"{__name__}.MySerializable",
         oneof2_val=MySimpleSerializable,
     )
+    _no_default_val = Float64Field("no_default_val", default=no_default)
 @pytest.mark.parametrize("set_is_ci", [False, True], indirect=True)
@@ -187,6 +189,7 @@ def test_serializable(set_is_ci):
         _dict_val={"a": b"bytes_value"},
         _ref_val=MySerializable(),
         _oneof_val=MySerializable(_id="2"),
+        _no_default_val=no_default,
     )
     header, buffers = serialize(my_serializable)
@@ -234,7 +237,11 @@ def _assert_serializable_eq(my_serializable, my_serializable2):
         if not hasattr(my_serializable, field.name):
             continue
         expect_value = getattr(my_serializable, field_name)
-        actual_value = getattr(my_serializable2, field_name)
+        if expect_value is no_default:
+            assert not hasattr(my_serializable2, field.name)
+            continue
+        else:
+            actual_value = getattr(my_serializable2, field_name)
         if isinstance(expect_value, np.ndarray):
             np.testing.assert_array_equal(expect_value, actual_value)
         elif isinstance(expect_value, pd.DataFrame):

maxframe/serialization/tests/test_serial.py CHANGED Viewed

@@ -42,7 +42,7 @@ except ImportError:
 from ...lib.sparse import SparseMatrix
 from ...lib.wrapped_pickle import switch_unpickle
 from ...tests.utils import require_cudf, require_cupy
-from ...utils import lazy_import
+from ...utils import lazy_import, no_default
 from .. import (
     PickleContainer,
     RemoteException,
@@ -90,6 +90,7 @@ class CustomNamedTuple(NamedTuple):
         pd.Timedelta(102.234154131),
         {"abc": 5.6, "def": [3.4], "gh": None, "ijk": {}},
         OrderedDict([("abcd", 5.6)]),
+        no_default,
     ],
 )
 @switch_unpickle