PyPI - maxframe - Versions diffs - 0.1.0b5__cp37-cp37m-win_amd64.whl → 1.0.0__cp37-cp37m-win_amd64.whl - Mend

maxframe 0.1.0b5__cp37-cp37m-win_amd64.whl → 1.0.0__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (203) hide show

maxframe/_utils.cp37-win_amd64.pyd +0 -0
maxframe/codegen.py +10 -4
maxframe/config/config.py +68 -10
maxframe/config/validators.py +42 -11
maxframe/conftest.py +58 -14
maxframe/core/__init__.py +2 -16
maxframe/core/entity/__init__.py +1 -12
maxframe/core/entity/executable.py +1 -1
maxframe/core/entity/objects.py +46 -45
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
maxframe/core/graph/core.pyx +4 -4
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +7 -33
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
maxframe/dataframe/core.py +31 -7
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +117 -23
maxframe/dataframe/datasource/read_odps_table.py +6 -3
maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
maxframe/dataframe/datastore/to_odps.py +28 -0
maxframe/dataframe/extensions/__init__.py +5 -0
maxframe/dataframe/extensions/flatjson.py +131 -0
maxframe/dataframe/extensions/flatmap.py +317 -0
maxframe/dataframe/extensions/reshuffle.py +1 -1
maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
maxframe/dataframe/groupby/core.py +1 -1
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/fill.py +4 -1
maxframe/dataframe/groupby/getitem.py +6 -0
maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
maxframe/dataframe/groupby/transform.py +5 -1
maxframe/dataframe/indexing/align.py +1 -1
maxframe/dataframe/indexing/loc.py +6 -4
maxframe/dataframe/indexing/rename.py +5 -28
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/merge/__init__.py +9 -1
maxframe/dataframe/merge/concat.py +41 -31
maxframe/dataframe/merge/merge.py +237 -3
maxframe/dataframe/merge/tests/test_merge.py +126 -1
maxframe/dataframe/misc/apply.py +5 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +8 -8
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/memory_usage.py +2 -2
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/tests/test_misc.py +33 -2
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/reduction/core.py +2 -2
maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/corr.py +3 -3
maxframe/dataframe/statistics/quantile.py +13 -19
maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/dataframe/utils.py +26 -11
maxframe/dataframe/window/expanding.py +5 -3
maxframe/dataframe/window/tests/test_expanding.py +2 -2
maxframe/errors.py +13 -0
maxframe/extension.py +12 -0
maxframe/io/__init__.py +13 -0
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
maxframe/{odpsio → io/odpsio}/schema.py +38 -16
maxframe/io/odpsio/tableio.py +719 -0
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +63 -0
maxframe/learn/contrib/__init__.py +3 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/llm/__init__.py +16 -0
maxframe/learn/contrib/llm/core.py +54 -0
maxframe/learn/contrib/llm/models/__init__.py +14 -0
maxframe/learn/contrib/llm/models/dashscope.py +73 -0
maxframe/learn/contrib/llm/multi_modal.py +42 -0
maxframe/learn/contrib/llm/text.py +42 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -2
maxframe/learn/contrib/xgboost/core.py +87 -2
maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
maxframe/learn/contrib/xgboost/predict.py +29 -46
maxframe/learn/contrib/xgboost/regressor.py +3 -10
maxframe/learn/contrib/xgboost/train.py +29 -18
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
maxframe/lib/mmh3.pyi +43 -0
maxframe/lib/sparse/tests/test_sparse.py +15 -15
maxframe/lib/wrapped_pickle.py +2 -1
maxframe/opcodes.py +8 -0
maxframe/protocol.py +154 -27
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +3 -0
maxframe/serialization/core.pyx +67 -26
maxframe/serialization/exception.py +1 -1
maxframe/serialization/pandas.py +52 -17
maxframe/serialization/serializables/core.py +180 -15
maxframe/serialization/serializables/field_type.py +4 -1
maxframe/serialization/serializables/tests/test_serializable.py +54 -5
maxframe/serialization/tests/test_serial.py +2 -1
maxframe/session.py +9 -2
maxframe/tensor/__init__.py +81 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +3 -0
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +101 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +74 -0
maxframe/tensor/{base → misc}/__init__.py +2 -0
maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/{base → misc}/unique.py +3 -3
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +2 -1
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tensor/utils.py +2 -22
maxframe/tests/test_protocol.py +34 -0
maxframe/tests/test_utils.py +0 -12
maxframe/tests/utils.py +17 -2
maxframe/typing_.py +4 -1
maxframe/udf.py +8 -9
maxframe/utils.py +106 -86
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +3 -3
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
maxframe_client/__init__.py +0 -1
maxframe_client/clients/framedriver.py +4 -1
maxframe_client/fetcher.py +81 -74
maxframe_client/session/consts.py +3 -0
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +194 -40
maxframe_client/session/task.py +94 -39
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +109 -8
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/tableio.py +0 -322
maxframe/odpsio/volumeio.py +0 -95
maxframe_client/clients/spe.py +0 -104
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +0 -0
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0

maxframe/learn/contrib/xgboost/train.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import logging
 from collections import OrderedDict
-from .... import opcodes as OperandDef
+from .... import opcodes
 from ....core import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
@@ -29,6 +29,7 @@ from ....serialization.serializables import (
     KeyField,
     ListField,
 )
+from .core import Booster
 from .dmatrix import ToDMatrix, to_dmatrix
 logger = logging.getLogger(__name__)
@@ -41,7 +42,7 @@ def _on_serialize_evals(evals_val):
 class XGBTrain(Operator, TileableOperatorMixin):
-    _op_type_ = OperandDef.XGBOOST_TRAIN
+    _op_type_ = opcodes.XGBOOST_TRAIN
     params = DictField("params", key_type=FieldTypes.string, default=None)
     dtrain = KeyField("dtrain", default=None)
@@ -59,49 +60,59 @@ class XGBTrain(Operator, TileableOperatorMixin):
     num_boost_round = Int64Field("num_boost_round", default=10)
     num_class = Int64Field("num_class", default=None)
-    # Store evals_result in local to store the remote evals_result
-    evals_result: dict = None
     def __init__(self, gpu=None, **kw):
         super().__init__(gpu=gpu, **kw)
         if self.output_types is None:
             self.output_types = [OutputType.object]
+        if self.has_evals_result:
+            self.output_types.append(OutputType.object)
     def _set_inputs(self, inputs):
         super()._set_inputs(inputs)
         self.dtrain = self._inputs[0]
         rest = self._inputs[1:]
-        if self.evals is not None:
+        if self.has_evals_result:
             evals_dict = OrderedDict(self.evals)
             new_evals_dict = OrderedDict()
             for new_key, val in zip(rest, evals_dict.values()):
                 new_evals_dict[new_key] = val
             self.evals = list(new_evals_dict.items())
-    def __call__(self):
+    def __call__(self, evals_result):
         inputs = [self.dtrain]
-        if self.evals is not None:
+        if self.has_evals_result:
             inputs.extend(e[0] for e in self.evals)
-        return self.new_tileable(inputs)
+        return self.new_tileables(
+            inputs, object_class=Booster, evals_result=evals_result
+        )[0]
+    @property
+    def output_limit(self):
+        return 2 if self.has_evals_result else 1
+    @property
+    def has_evals_result(self) -> bool:
+        return self.evals
 def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwargs):
     """
-    Train XGBoost model in Mars manner.
+    Train XGBoost model in MaxFrame manner.
     Parameters
     ----------
-    Parameters are the same as `xgboost.train`.
+    Parameters are the same as `xgboost.train`. Note that train is an eager-execution
+    API if evals is passed, thus the call will be blocked until training finished.
     Returns
     -------
     results: Booster
     """
-    evals_result = evals_result or dict()
-    evals = None or ()
+    evals_result = evals_result if evals_result is not None else dict()
     processed_evals = []
+    session = kwargs.pop("session", None)
+    run_kwargs = kwargs.pop("run_kwargs", dict())
     if evals:
         for eval_dmatrix, name in evals:
             if not isinstance(name, str):
@@ -110,12 +121,12 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
                 processed_evals.append((eval_dmatrix, name))
             else:
                 processed_evals.append((to_dmatrix(eval_dmatrix), name))
-    return XGBTrain(
+    data = XGBTrain(
         params=params,
         dtrain=dtrain,
         evals=processed_evals,
         evals_result=evals_result,
         num_class=num_class,
-        **kwargs
-    )()
+        **kwargs,
+    )(evals_result)
+    return data.execute(session=session, **run_kwargs) if evals else data

maxframe/{core/operator/fuse.py → learn/core.py} RENAMED Viewed

@@ -12,18 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ... import opcodes
-from ...serialization.serializables import ReferenceField
-from ..graph import ChunkGraph
-from .base import Operator
+from ..core.entity.objects import Object, ObjectData
-class Fuse(Operator):
-    __slots__ = ("_fuse_graph",)
-    _op_type_ = opcodes.FUSE
+class ModelData(ObjectData):
+    pass
-    fuse_graph = ReferenceField("fuse_graph", ChunkGraph)
+class Model(Object):
+    pass
-class FuseChunkMixin:
-    __slots__ = ()
+MODEL_TYPE = (Model, ModelData)

maxframe/lib/mmh3.cp37-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/lib/mmh3.pyi ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple
+def hash(key, seed=0, signed=True) -> int:
+    """
+    Return a 32 bit integer.
+    """
+def hash_from_buffer(key, seed=0, signed=True) -> int:
+    """
+    Return a 32 bit integer. Designed for large memory-views such as numpy arrays.
+    """
+def hash64(key, seed=0, x64arch=True, signed=True) -> Tuple[int, int]:
+    """
+    Return a tuple of two 64 bit integers for a string. Optimized for
+    the x64 bit architecture when x64arch=True, otherwise for x86.
+    """
+def hash128(key, seed=0, x64arch=True, signed=False) -> int:
+    """
+    Return a 128 bit long integer. Optimized for the x64 bit architecture
+    when x64arch=True, otherwise for x86.
+    """
+def hash_bytes(key, seed=0, x64arch=True) -> bytes:
+    """
+    Return a 128 bit hash value as bytes for a string. Optimized for the
+    x64 bit architecture when x64arch=True, otherwise for the x86.
+    """

maxframe/lib/sparse/tests/test_sparse.py CHANGED Viewed

@@ -55,13 +55,13 @@ def test_sparse_creation():
     s = SparseNDArray(s1_data)
     assert s.ndim == 2
     assert isinstance(s, SparseMatrix)
-    assert_array_equal(s.toarray(), s1_data.A)
-    assert_array_equal(s.todense(), s1_data.A)
+    assert_array_equal(s.toarray(), s1_data.toarray())
+    assert_array_equal(s.todense(), s1_data.toarray())
     ss = pickle.loads(pickle.dumps(s))
     assert s == ss
-    assert_array_equal(ss.toarray(), s1_data.A)
-    assert_array_equal(ss.todense(), s1_data.A)
+    assert_array_equal(ss.toarray(), s1_data.toarray())
+    assert_array_equal(ss.todense(), s1_data.toarray())
     v = SparseNDArray(v1, shape=(3,))
     assert s.ndim
@@ -331,12 +331,12 @@ def test_sparse_dot():
     assert_array_equal(mls.dot(s1, v1_s), s1.dot(v1_data))
     assert_array_equal(mls.dot(s2, v1_s), s2.dot(v1_data))
-    assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.A))
-    assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.A))
+    assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.toarray()))
+    assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.toarray()))
     assert_array_equal(mls.dot(v1_s, v1_s), v1_data.dot(v1_data), almost=True)
     assert_array_equal(mls.dot(v2_s, v2_s), v2_data.dot(v2_data), almost=True)
-    assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.A))
+    assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.toarray()))
     assert_array_equal(mls.dot(v1_s, v1_s, sparse=False), v1_data.dot(v1_data))
@@ -390,7 +390,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(3)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, 3)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -399,7 +399,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(3, wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, 3, wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -408,7 +408,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal([1, 2, 3])
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, [1, 2, 3])
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -417,7 +417,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal([1, 2, 3], wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, [1, 2, 3], wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -427,7 +427,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -437,7 +437,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val, wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val, wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -447,7 +447,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val)
     np.testing.assert_array_equal(arr.toarray(), expected)
@@ -457,7 +457,7 @@ def test_sparse_fill_diagonal():
     arr = SparseNDArray(s1)
     arr.fill_diagonal(val, wrap=True)
-    expected = s1.copy().A
+    expected = s1.copy().toarray()
     np.fill_diagonal(expected, val, wrap=True)
     np.testing.assert_array_equal(arr.toarray(), expected)

maxframe/lib/wrapped_pickle.py CHANGED Viewed

@@ -120,7 +120,8 @@ class _UnpickleSwitch:
             @functools.wraps(func)
             async def wrapped(*args, **kwargs):
                 with _UnpickleSwitch(forbidden=self._forbidden):
-                    return await func(*args, **kwargs)
+                    ret = await func(*args, **kwargs)
+                return ret
         else:

maxframe/opcodes.py CHANGED Viewed

@@ -392,6 +392,10 @@ PIVOT_TABLE = 744
 FUSE = 801
+# LLM
+DASHSCOPE_TEXT_GENERATION = 810
+DASHSCOPE_MULTI_MODAL_GENERATION = 811
 # table like input for tensor
 TABLE_COO = 1003
 # store tensor as coo format
@@ -532,6 +536,8 @@ STATSMODELS_TRAIN = 3012
 STATSMODELS_PREDICT = 3013
 # learn
+CONNECTED_COMPONENTS = 3100
 # checks
 CHECK_NON_NEGATIVE = 3300
 # classifier check targets
@@ -566,6 +572,8 @@ CHOLESKY_FUSE = 999988
 # MaxFrame-dedicated functions
 DATAFRAME_RESHUFFLE = 10001
+FLATMAP = 10002
+FLATJSON = 10003
 # MaxFrame internal operators
 DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001

maxframe/protocol.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import base64
 import enum
 import uuid
-from typing import Any, Dict, Generic, List, Optional, Tuple, Type, TypeVar
+from typing import Any, Dict, Generic, List, Optional, Type, TypeVar
 import pandas as pd
@@ -32,12 +32,12 @@ from .serialization.serializables import (
     EnumField,
     FieldTypes,
     Float64Field,
+    Int32Field,
     ListField,
     ReferenceField,
     Serializable,
     SeriesField,
     StringField,
-    TupleField,
 )
 pickling_support.install()
@@ -71,6 +71,9 @@ class DagStatus(enum.Enum):
     CANCELLING = 4
     CANCELLED = 5
+    def is_terminated(self):
+        return self in (DagStatus.CANCELLED, DagStatus.SUCCEEDED, DagStatus.FAILED)
 class DimensionIndex(Serializable):
     is_slice: bool = BoolField("is_slice", default=None)
@@ -88,19 +91,6 @@ class DataSerializeType(enum.Enum):
     PICKLE = 0
-class VolumeDataMeta(Serializable):
-    output_type: OutputType = EnumField(
-        "output_type", OutputType, FieldTypes.int8, default=None
-    )
-    serial_type: DataSerializeType = EnumField(
-        "serial_type", DataSerializeType, FieldTypes.int8, default=None
-    )
-    shape: Tuple[int, ...] = TupleField("shape", FieldTypes.int64, default=None)
-    nsplits: Tuple[Tuple[int, ...], ...] = TupleField(
-        "nsplits", FieldTypes.tuple(FieldTypes.tuple(FieldTypes.int64)), default=None
-    )
 _result_type_to_info_cls: Dict[ResultType, Type["ResultInfo"]] = dict()
@@ -150,6 +140,9 @@ class ODPSTableResultInfo(ResultInfo):
     partition_specs: Optional[List[str]] = ListField(
         "partition_specs", FieldTypes.string, default=None
     )
+    table_meta: Optional["DataFrameTableMeta"] = ReferenceField(
+        "table_meta", default=None
+    )
     def __init__(self, result_type: ResultType = None, **kw):
         result_type = result_type or ResultType.ODPS_TABLE
@@ -160,8 +153,17 @@ class ODPSTableResultInfo(ResultInfo):
         ret["full_table_name"] = self.full_table_name
         if self.partition_specs:
             ret["partition_specs"] = self.partition_specs
+        if self.table_meta:
+            ret["table_meta"] = self.table_meta.to_json()
         return ret
+    @classmethod
+    def _json_to_kwargs(cls, serialized: dict) -> dict:
+        kw = super()._json_to_kwargs(serialized)
+        if "table_meta" in kw:
+            kw["table_meta"] = DataFrameTableMeta.from_json(kw["table_meta"])
+        return kw
 class ODPSVolumeResultInfo(ResultInfo):
     _result_type = ResultType.ODPS_VOLUME
@@ -190,9 +192,9 @@ class ErrorInfo(JsonSerializable):
         "error_tracebacks", FieldTypes.list
     )
     raw_error_source: ErrorSource = EnumField(
-        "raw_error_source", ErrorSource, FieldTypes.int8
+        "raw_error_source", ErrorSource, FieldTypes.int8, default=None
     )
-    raw_error_data: Optional[Exception] = AnyField("raw_error_data")
+    raw_error_data: Optional[Exception] = AnyField("raw_error_data", default=None)
     @classmethod
     def from_exception(cls, exc: Exception):
@@ -201,20 +203,29 @@ class ErrorInfo(JsonSerializable):
         return cls(messages, tracebacks, ErrorSource.PYTHON, exc)
     def reraise(self):
-        if self.raw_error_source == ErrorSource.PYTHON:
+        if (
+            self.raw_error_source == ErrorSource.PYTHON
+            and self.raw_error_data is not None
+        ):
             raise self.raw_error_data
         raise RemoteException(self.error_messages, self.error_tracebacks, [])
     @classmethod
     def from_json(cls, serialized: dict) -> "ErrorInfo":
         kw = serialized.copy()
-        kw["raw_error_source"] = ErrorSource(serialized["raw_error_source"])
+        if kw.get("raw_error_source") is not None:
+            kw["raw_error_source"] = ErrorSource(serialized["raw_error_source"])
+        else:
+            kw["raw_error_source"] = None
         if kw.get("raw_error_data"):
             bufs = [base64.b64decode(s) for s in kw["raw_error_data"]]
             try:
                 kw["raw_error_data"] = pickle.loads(bufs[0], buffers=bufs[1:])
             except:
-                kw["raw_error_data"] = None
+                # both error source and data shall be None to make sure
+                # RemoteException is raised.
+                kw["raw_error_source"] = kw["raw_error_data"] = None
         return cls(**kw)
     def to_json(self) -> dict:
@@ -227,7 +238,12 @@ class ErrorInfo(JsonSerializable):
         if isinstance(self.raw_error_data, (PickleContainer, RemoteException)):
             err_data_bufs = self.raw_error_data.get_buffers()
         elif isinstance(self.raw_error_data, BaseException):
-            err_data_bufs = pickle_buffers(self.raw_error_data)
+            try:
+                err_data_bufs = pickle_buffers(self.raw_error_data)
+            except:
+                err_data_bufs = None
+                ret["raw_error_source"] = None
         if err_data_bufs:
             ret["raw_error_data"] = [
                 base64.b64encode(s).decode() for s in err_data_bufs
@@ -249,9 +265,17 @@ class DagInfo(JsonSerializable):
     error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
     start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
     end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
+    subdag_infos: Dict[str, "SubDagInfo"] = DictField(
+        "subdag_infos",
+        key_type=FieldTypes.string,
+        value_type=FieldTypes.reference,
+        default_factory=dict,
+    )
     @classmethod
-    def from_json(cls, serialized: dict) -> "DagInfo":
+    def from_json(cls, serialized: dict) -> Optional["DagInfo"]:
+        if serialized is None:
+            return None
         kw = serialized.copy()
         kw["status"] = DagStatus(kw["status"])
         if kw.get("tileable_to_result_infos"):
@@ -261,6 +285,10 @@ class DagInfo(JsonSerializable):
             }
         if kw.get("error_info"):
             kw["error_info"] = ErrorInfo.from_json(kw["error_info"])
+        if kw.get("subdag_infos"):
+            kw["subdag_infos"] = {
+                k: SubDagInfo.from_json(v) for k, v in kw["subdag_infos"].items()
+            }
         return DagInfo(**kw)
     def to_json(self) -> dict:
@@ -279,6 +307,8 @@ class DagInfo(JsonSerializable):
             }
         if self.error_info:
             ret["error_info"] = self.error_info.to_json()
+        if self.subdag_infos:
+            ret["subdag_infos"] = {k: v.to_json() for k, v in self.subdag_infos.items()}
         return ret
@@ -302,7 +332,9 @@ class SessionInfo(JsonSerializable):
     error_info: Optional[ErrorInfo] = ReferenceField("error_info", default=None)
     @classmethod
-    def from_json(cls, serialized: dict) -> "SessionInfo":
+    def from_json(cls, serialized: dict) -> Optional["SessionInfo"]:
+        if serialized is None:
+            return None
         kw = serialized.copy()
         if kw.get("dag_infos"):
             kw["dag_infos"] = {
@@ -320,7 +352,10 @@ class SessionInfo(JsonSerializable):
             "idle_timestamp": self.idle_timestamp,
         }
         if self.dag_infos:
-            ret["dag_infos"] = {k: v.to_json() for k, v in self.dag_infos.items()}
+            ret["dag_infos"] = {
+                k: v.to_json() if v is not None else None
+                for k, v in self.dag_infos.items()
+            }
         if self.error_info:
             ret["error_info"] = self.error_info.to_json()
         return ret
@@ -340,9 +375,32 @@ class ExecuteDagRequest(Serializable):
         value_type=FieldTypes.reference,
         default=None,
     )
+    new_settings: Dict[str, Any] = DictField(
+        "new_settings",
+        key_type=FieldTypes.string,
+        default=None,
+    )
+class SubDagSubmitInstanceInfo(JsonSerializable):
+    submit_reason: str = StringField("submit_reason")
+    instance_id: str = StringField("instance_id")
+    subquery_id: Optional[int] = Int32Field("subquery_id", default=None)
+    @classmethod
+    def from_json(cls, serialized: dict) -> "SubDagSubmitInstanceInfo":
+        return SubDagSubmitInstanceInfo(**serialized)
+    def to_json(self) -> dict:
+        ret = {
+            "submit_reason": self.submit_reason,
+            "instance_id": self.instance_id,
+            "subquery_id": self.subquery_id,
+        }
+        return ret
-class SubDagInfo(Serializable):
+class SubDagInfo(JsonSerializable):
     subdag_id: str = StringField("subdag_id")
     status: DagStatus = EnumField("status", DagStatus, FieldTypes.int8, default=None)
     progress: float = Float64Field("progress", default=None)
@@ -355,9 +413,52 @@ class SubDagInfo(Serializable):
         FieldTypes.reference,
         default_factory=dict,
     )
+    start_timestamp: Optional[float] = Float64Field("start_timestamp", default=None)
+    end_timestamp: Optional[float] = Float64Field("end_timestamp", default=None)
+    submit_instances: List[SubDagSubmitInstanceInfo] = ListField(
+        "submit_instances",
+        FieldTypes.reference,
+        default_factory=list,
+    )
+    @classmethod
+    def from_json(cls, serialized: dict) -> "SubDagInfo":
+        kw = serialized.copy()
+        kw["status"] = DagStatus(kw["status"])
+        if kw.get("tileable_to_result_infos"):
+            kw["tileable_to_result_infos"] = {
+                k: ResultInfo.from_json(s)
+                for k, s in kw["tileable_to_result_infos"].items()
+            }
+        if kw.get("error_info"):
+            kw["error_info"] = ErrorInfo.from_json(kw["error_info"])
+        if kw.get("submit_instances"):
+            kw["submit_instances"] = [
+                SubDagSubmitInstanceInfo.from_json(s) for s in kw["submit_instances"]
+            ]
+        return SubDagInfo(**kw)
+    def to_json(self) -> dict:
+        ret = {
+            "subdag_id": self.subdag_id,
+            "status": self.status.value,
+            "progress": self.progress,
+            "start_timestamp": self.start_timestamp,
+            "end_timestamp": self.end_timestamp,
+        }
+        if self.error_info:
+            ret["error_info"] = self.error_info.to_json()
+        if self.tileable_to_result_infos:
+            ret["tileable_to_result_infos"] = {
+                k: v.to_json() for k, v in self.tileable_to_result_infos.items()
+            }
+        if self.submit_instances:
+            ret["submit_instances"] = [i.to_json() for i in self.submit_instances]
+        return ret
 class ExecuteSubDagRequest(Serializable):
+    subdag_id: str = StringField("subdag_id")
     dag: TileableGraph = ReferenceField(
         "dag",
         on_serialize=SerializableGraph.from_graph,
@@ -371,7 +472,7 @@ class DecrefRequest(Serializable):
     keys: List[str] = ListField("keys", FieldTypes.string, default=None)
-class DataFrameTableMeta(Serializable):
+class DataFrameTableMeta(JsonSerializable):
     __slots__ = "_pd_column_names", "_pd_index_level_names"
     table_name: Optional[str] = StringField("table_name", default=None)
@@ -402,7 +503,7 @@ class DataFrameTableMeta(Serializable):
             self._pd_index_level_names = self.pd_index_dtypes.index.tolist()
             return self._pd_index_level_names
-    def __eq__(self, other: "Serializable") -> bool:
+    def __eq__(self, other: "DataFrameTableMeta") -> bool:
         if not isinstance(other, type(self)):
             return False
         for k in self._FIELDS:
@@ -413,3 +514,29 @@ class DataFrameTableMeta(Serializable):
             if not is_same:
                 return False
         return True
+    def to_json(self) -> dict:
+        b64_pk = lambda x: base64.b64encode(pickle.dumps(x)).decode()
+        ret = {
+            "table_name": self.table_name,
+            "type": self.type.value,
+            "table_column_names": self.table_column_names,
+            "table_index_column_names": self.table_index_column_names,
+            "pd_column_dtypes": b64_pk(self.pd_column_dtypes),
+            "pd_column_level_names": b64_pk(self.pd_column_level_names),
+            "pd_index_dtypes": b64_pk(self.pd_index_dtypes),
+        }
+        return ret
+    @classmethod
+    def from_json(cls, serialized: dict) -> "DataFrameTableMeta":
+        b64_upk = lambda x: pickle.loads(base64.b64decode(x))
+        serialized.update(
+            {
+                "type": OutputType(serialized["type"]),
+                "pd_column_dtypes": b64_upk(serialized["pd_column_dtypes"]),
+                "pd_column_level_names": b64_upk(serialized["pd_column_level_names"]),
+                "pd_index_dtypes": b64_upk(serialized["pd_index_dtypes"]),
+            }
+        )
+        return DataFrameTableMeta(**serialized)