PyPI - maxframe - Versions diffs - 0.1.0b5__cp38-cp38-win_amd64.whl → 1.0.0__cp38-cp38-win_amd64.whl - Mend

maxframe 0.1.0b5__cp38-cp38-win_amd64.whl → 1.0.0__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (203) hide show

maxframe/_utils.cp38-win_amd64.pyd +0 -0
maxframe/codegen.py +10 -4
maxframe/config/config.py +68 -10
maxframe/config/validators.py +42 -11
maxframe/conftest.py +58 -14
maxframe/core/__init__.py +2 -16
maxframe/core/entity/__init__.py +1 -12
maxframe/core/entity/executable.py +1 -1
maxframe/core/entity/objects.py +46 -45
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
maxframe/core/graph/core.pyx +4 -4
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +7 -33
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
maxframe/dataframe/core.py +31 -7
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +117 -23
maxframe/dataframe/datasource/read_odps_table.py +6 -3
maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
maxframe/dataframe/datastore/to_odps.py +28 -0
maxframe/dataframe/extensions/__init__.py +5 -0
maxframe/dataframe/extensions/flatjson.py +131 -0
maxframe/dataframe/extensions/flatmap.py +317 -0
maxframe/dataframe/extensions/reshuffle.py +1 -1
maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
maxframe/dataframe/groupby/core.py +1 -1
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/fill.py +4 -1
maxframe/dataframe/groupby/getitem.py +6 -0
maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
maxframe/dataframe/groupby/transform.py +5 -1
maxframe/dataframe/indexing/align.py +1 -1
maxframe/dataframe/indexing/loc.py +6 -4
maxframe/dataframe/indexing/rename.py +5 -28
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/merge/__init__.py +9 -1
maxframe/dataframe/merge/concat.py +41 -31
maxframe/dataframe/merge/merge.py +237 -3
maxframe/dataframe/merge/tests/test_merge.py +126 -1
maxframe/dataframe/misc/apply.py +5 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +8 -8
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/memory_usage.py +2 -2
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/tests/test_misc.py +33 -2
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/reduction/core.py +2 -2
maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/corr.py +3 -3
maxframe/dataframe/statistics/quantile.py +13 -19
maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/dataframe/utils.py +26 -11
maxframe/dataframe/window/expanding.py +5 -3
maxframe/dataframe/window/tests/test_expanding.py +2 -2
maxframe/errors.py +13 -0
maxframe/extension.py +12 -0
maxframe/io/__init__.py +13 -0
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
maxframe/{odpsio → io/odpsio}/schema.py +38 -16
maxframe/io/odpsio/tableio.py +719 -0
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +63 -0
maxframe/learn/contrib/__init__.py +3 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/llm/__init__.py +16 -0
maxframe/learn/contrib/llm/core.py +54 -0
maxframe/learn/contrib/llm/models/__init__.py +14 -0
maxframe/learn/contrib/llm/models/dashscope.py +73 -0
maxframe/learn/contrib/llm/multi_modal.py +42 -0
maxframe/learn/contrib/llm/text.py +42 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -2
maxframe/learn/contrib/xgboost/core.py +87 -2
maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
maxframe/learn/contrib/xgboost/predict.py +29 -46
maxframe/learn/contrib/xgboost/regressor.py +3 -10
maxframe/learn/contrib/xgboost/train.py +29 -18
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
maxframe/lib/mmh3.pyi +43 -0
maxframe/lib/sparse/tests/test_sparse.py +15 -15
maxframe/lib/wrapped_pickle.py +2 -1
maxframe/opcodes.py +8 -0
maxframe/protocol.py +154 -27
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +3 -0
maxframe/serialization/core.pyx +67 -26
maxframe/serialization/exception.py +1 -1
maxframe/serialization/pandas.py +52 -17
maxframe/serialization/serializables/core.py +180 -15
maxframe/serialization/serializables/field_type.py +4 -1
maxframe/serialization/serializables/tests/test_serializable.py +54 -5
maxframe/serialization/tests/test_serial.py +2 -1
maxframe/session.py +9 -2
maxframe/tensor/__init__.py +81 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +3 -0
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +101 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +74 -0
maxframe/tensor/{base → misc}/__init__.py +2 -0
maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/{base → misc}/unique.py +3 -3
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +2 -1
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tensor/utils.py +2 -22
maxframe/tests/test_protocol.py +34 -0
maxframe/tests/test_utils.py +0 -12
maxframe/tests/utils.py +17 -2
maxframe/typing_.py +4 -1
maxframe/udf.py +8 -9
maxframe/utils.py +106 -86
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
maxframe_client/__init__.py +0 -1
maxframe_client/clients/framedriver.py +4 -1
maxframe_client/fetcher.py +81 -74
maxframe_client/session/consts.py +3 -0
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +194 -40
maxframe_client/session/task.py +94 -39
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +109 -8
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/tableio.py +0 -322
maxframe/odpsio/volumeio.py +0 -95
maxframe_client/clients/spe.py +0 -104
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/window/tests/test_expanding.py CHANGED Viewed

@@ -29,8 +29,8 @@ def test_expanding():
     with pytest.raises(NotImplementedError):
         _ = df2.expanding(3, axis=1)
-    r = df2.expanding(3, center=False)
-    expected = df.expanding(3, center=False)
+    r = df2.expanding(3)
+    expected = df.expanding(3)
     assert repr(r) == repr(expected)
     assert "b" in dir(r)

maxframe/errors.py CHANGED Viewed

@@ -17,5 +17,18 @@ class MaxFrameError(Exception):
     pass
+class MaxFrameIntentionalError(MaxFrameError):
+    pass
 class MaxFrameUserError(MaxFrameError):
     pass
+class NoTaskServerResponseError(MaxFrameError):
+    pass
+class SessionAlreadyClosedError(MaxFrameError):
+    def __init__(self, session_id: str):
+        super().__init__(f"Session {session_id} is already closed")

maxframe/extension.py CHANGED Viewed

@@ -48,6 +48,18 @@ class MaxFrameExtension(metaclass=abc.ABCMeta):
         """
         pass
+    @classmethod
+    async def reload_session(cls, session_id: str) -> None:
+        """
+        Reload the session state when the session is recovered from failover.
+        Parameters
+        ----------
+        session_id : str
+            The session id.
+        """
+        pass
     @classmethod
     def init_service_extension(cls) -> None:
         """

maxframe/io/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

maxframe/io/objects/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import (
+    AbstractObjectIOHandler,
+    get_object_io_handler,
+    register_object_io_handler,
+)
+# isort: off
+from . import tensor
+del tensor

maxframe/io/objects/core.py ADDED Viewed

@@ -0,0 +1,140 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABCMeta, abstractmethod
+from typing import Any, Dict, Type, Union
+import msgpack
+from ...core import Entity, EntityData
+from ...core.entity import ObjectData, TileableData
+from ...lib import wrapped_pickle as pickle
+from ...typing_ import SlicesType, TileableType
+from ...utils import TypeDispatcher
+from ..odpsio.volumeio import ODPSVolumeReader, ODPSVolumeWriter
+_MetaType = Dict[str, Any]
+_META_FILE_NAME = ".meta"
+_META_PICKLED_KEYS_KEY = ".pickled_keys"
+_io_handler_dispatcher = TypeDispatcher()
+def register_object_io_handler(tileable_data_type: Type[TileableData]):
+    def wrapper(handler_cls):
+        _io_handler_dispatcher.register(tileable_data_type, handler_cls)
+        return handler_cls
+    return wrapper
+def get_object_io_handler(
+    tileable_data_type: Union[Entity, EntityData, Type[EntityData]]
+) -> Type["AbstractObjectIOHandler"]:
+    if not isinstance(tileable_data_type, type):
+        if isinstance(tileable_data_type, Entity):
+            tileable_data_type = tileable_data_type.data
+        tileable_data_type = type(tileable_data_type)
+    return _io_handler_dispatcher.get_handler(tileable_data_type)
+class AbstractObjectIOHandler(metaclass=ABCMeta):
+    def _prepare_meta_for_serial(
+        self, tileable: TileableType, meta: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        to_pack = meta.copy()
+        pickled_keys = []
+        for k, v in meta.items():
+            if not isinstance(v, (str, bytes, int, float, bool)):
+                to_pack[k] = pickle.dumps(v)
+                pickled_keys.append(k)
+        to_pack[".pickled_keys"] = pickled_keys
+        return to_pack
+    def _prepare_meta_for_deserial(
+        self, tileable: TileableType, meta: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        pickled_keys = meta.pop(".pickled_keys", None) or []
+        for k in pickled_keys:
+            meta[k] = pickle.loads(meta[k])
+        return meta
+    def read_object_meta(
+        self, reader: ODPSVolumeReader, tileable: TileableType
+    ) -> Dict[str, Any]:
+        meta_obj = msgpack.loads(reader.read_file(_META_FILE_NAME))
+        return self._prepare_meta_for_deserial(tileable, meta_obj)
+    @abstractmethod
+    def _read_object_body(
+        self,
+        reader: ODPSVolumeReader,
+        tileable: TileableType,
+        meta: Dict[str, Any],
+        slices: SlicesType = None,
+    ) -> Any:
+        raise NotImplementedError
+    def read_object(
+        self,
+        reader: ODPSVolumeReader,
+        tileable: TileableType,
+        slices: SlicesType = None,
+    ) -> Any:
+        meta = self.read_object_meta(reader, tileable)
+        return self._read_object_body(reader, tileable, meta, slices)
+    @abstractmethod
+    def _write_object_body(
+        self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
+    ):
+        raise NotImplementedError
+    def write_object_meta(
+        self,
+        writer: ODPSVolumeWriter,
+        tileable: TileableType,
+        extra_meta: Dict[str, Any] = None,
+    ):
+        meta_obj = tileable.params.copy()
+        if extra_meta:
+            meta_obj.update(extra_meta)
+        meta_obj = self._prepare_meta_for_serial(tileable, meta_obj)
+        packed = msgpack.dumps(meta_obj)
+        writer.write_file(_META_FILE_NAME, packed)
+    def write_object(
+        self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
+    ):
+        self.write_object_meta(writer, tileable)
+        self._write_object_body(writer, tileable, value)
+@register_object_io_handler(ObjectData)
+class ObjectIOHandler(AbstractObjectIOHandler):
+    def _read_object_body(
+        self,
+        reader: ODPSVolumeReader,
+        tileable: TileableType,
+        meta: Dict[str, Any],
+        slices: SlicesType = None,
+    ) -> Any:
+        return pickle.loads(reader.read_file("data"))
+    def _write_object_body(
+        self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
+    ):
+        writer.write_file("data", pickle.dumps(value))

maxframe/io/objects/tensor.py ADDED Viewed

@@ -0,0 +1,76 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import struct
+from io import BytesIO
+from typing import Any, Dict
+import msgpack
+import numpy as np
+from ...lib import wrapped_pickle as pickle
+from ...tensor.core import TensorData
+from ...typing_ import SlicesType, TileableType
+from ..odpsio import ODPSVolumeReader, ODPSVolumeWriter
+from .core import AbstractObjectIOHandler, register_object_io_handler
+@register_object_io_handler(TensorData)
+class TensorIOHandler(AbstractObjectIOHandler):
+    def write_object_meta(
+        self,
+        writer: ODPSVolumeWriter,
+        tileable: TileableType,
+        extra_meta: Dict[str, Any] = None,
+    ):
+        # fixme upload in real slices when tensors are supported in DPE
+        extra_meta = extra_meta or dict()
+        extra_meta["nsplits"] = ((np.nan,),)
+        super().write_object_meta(writer, tileable, extra_meta=extra_meta)
+    def _read_object_body(
+        self,
+        reader: ODPSVolumeReader,
+        tileable: TileableType,
+        meta: Dict[str, Any],
+        slices: SlicesType = None,
+    ) -> Any:
+        # fixme read data with slices when tensors are supported in DPE
+        body = reader.read_file("0,0.dat")
+        bio = BytesIO(body)
+        (header_len,) = struct.unpack("<I", bio.read(4))
+        header_data = msgpack.loads(bio.read(header_len))
+        pickled = bio.read(header_data[0])
+        bufs = [bio.read(size) for size in header_data[1:]]
+        return pickle.loads(pickled, buffers=bufs)
+    def _write_object_body(
+        self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
+    ):
+        # fixme upload in real slices when tensors are supported in DPE
+        def data_gen():
+            bufs = []
+            pickled = pickle.dumps(value, buffer_callback=bufs.append)
+            header_data = msgpack.dumps(
+                [len(pickled)] + [len(buf.raw()) for buf in bufs]
+            )
+            yield struct.pack("<I", len(header_data))
+            yield header_data
+            yield pickled
+            for buf in bufs:
+                yield buf
+        writer.write_file("0,0.dat", data_gen())

maxframe/io/objects/tests/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

maxframe/io/objects/tests/test_object_io.py ADDED Viewed

@@ -0,0 +1,97 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pytest
+from odps import ODPS
+from ....core import OutputType
+from ....core.operator import ObjectOperatorMixin, Operator
+from ....tensor.datasource import ArrayDataSource
+from ....tests.utils import tn
+from ...odpsio import ODPSVolumeReader, ODPSVolumeWriter
+from ..core import get_object_io_handler
+class TestObjectOp(Operator, ObjectOperatorMixin):
+    def __call__(self):
+        self._output_types = [OutputType.object]
+        return self.new_tileable([])
+@pytest.fixture(scope="module")
+def create_volume(request, oss_config):
+    test_vol_name = tn("test_object_io_volume")
+    odps_entry = ODPS.from_environments()
+    try:
+        odps_entry.delete_volume(test_vol_name, auto_remove_dir=True, recursive=True)
+    except:
+        pass
+    oss_test_dir_name = tn("test_oss_directory")
+    if oss_config is None:
+        pytest.skip("Need oss and its config to run this test")
+    (
+        oss_access_id,
+        oss_secret_access_key,
+        oss_bucket_name,
+        oss_endpoint,
+    ) = oss_config.oss_config
+    test_location = "oss://%s:%s@%s/%s/%s" % (
+        oss_access_id,
+        oss_secret_access_key,
+        oss_endpoint,
+        oss_bucket_name,
+        oss_test_dir_name,
+    )
+    oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
+    odps_entry.create_external_volume(test_vol_name, location=test_location)
+    try:
+        yield test_vol_name
+    finally:
+        try:
+            odps_entry.delete_volume(
+                test_vol_name, auto_remove_dir=True, recursive=True
+            )
+        except:
+            pass
+def test_simple_object_io(create_volume):
+    obj = TestObjectOp()()
+    data = "abcdefg"
+    odps_entry = ODPS.from_environments()
+    reader = ODPSVolumeReader(odps_entry, create_volume, obj.key)
+    writer = ODPSVolumeWriter(odps_entry, create_volume, obj.key)
+    handler = get_object_io_handler(obj)()
+    handler.write_object(writer, obj, data)
+    assert data == handler.read_object(reader, obj)
+def test_tensor_object_io(create_volume):
+    data = np.array([[4, 9, 2], [3, 5, 7], [8, 1, 6]])
+    obj = ArrayDataSource(data, dtype=data.dtype)(data.shape)
+    odps_entry = ODPS.from_environments()
+    reader = ODPSVolumeReader(odps_entry, create_volume, obj.key)
+    writer = ODPSVolumeWriter(odps_entry, create_volume, obj.key)
+    handler = get_object_io_handler(obj)()
+    handler.write_object(writer, obj, data)
+    np.testing.assert_equal(data, handler.read_object(reader, obj))

maxframe/{odpsio → io/odpsio}/__init__.py RENAMED Viewed

@@ -14,8 +14,10 @@
 from .arrow import arrow_to_pandas, pandas_to_arrow
 from .schema import (
+    arrow_schema_to_odps_schema,
     build_dataframe_table_meta,
     odps_schema_to_pandas_dtypes,
     pandas_to_odps_schema,
 )
-from .tableio import HaloTableIO
+from .tableio import HaloTableIO, ODPSTableIO
+from .volumeio import ODPSVolumeReader, ODPSVolumeWriter

maxframe/{odpsio → io/odpsio}/arrow.py RENAMED Viewed

@@ -17,10 +17,10 @@ from typing import Any, Tuple, Union
 import pandas as pd
 import pyarrow as pa
-from ..core import OutputType
-from ..protocol import DataFrameTableMeta
-from ..tensor.core import TENSOR_TYPE
-from ..typing_ import ArrowTableType, PandasObjectTypes
+from ...core import OutputType
+from ...protocol import DataFrameTableMeta
+from ...tensor.core import TENSOR_TYPE
+from ...typing_ import ArrowTableType, PandasObjectTypes
 from .schema import build_dataframe_table_meta
@@ -45,9 +45,13 @@ def _rebuild_dataframe(
 def _rebuild_index(df: pd.DataFrame, table_meta: DataFrameTableMeta) -> pd.Index:
     if df.shape[1] > 1:
-        df.columns = pd.Index(table_meta.pd_index_level_names)
-        return pd.MultiIndex.from_frame(df)
-    return pd.Index(df.iloc[:, 0], name=table_meta.pd_index_level_names[0])
+        idx = pd.MultiIndex.from_frame(df)
+        idx.names = table_meta.pd_index_level_names
+    else:
+        # make sure even if None names are updated properly
+        idx = pd.Index(df.iloc[:, 0])
+        idx.name = table_meta.pd_index_level_names[0]
+    return idx
 def arrow_to_pandas(
@@ -65,20 +69,37 @@ def arrow_to_pandas(
 def pandas_to_arrow(
-    df: Any, nthreads=1, ignore_index=False
+    df: Any, nthreads=1, ignore_index=False, ms_cols=None
 ) -> Tuple[ArrowTableType, DataFrameTableMeta]:
     table_meta = build_dataframe_table_meta(df, ignore_index)
     df = df.copy() if callable(getattr(df, "copy", None)) else df
+    table_datetime_cols = None
     if table_meta.type in (OutputType.dataframe, OutputType.series):
         if table_meta.type == OutputType.series:
             df = df.to_frame("_data" if df.name is None else df.name)
+            if ms_cols:
+                table_datetime_cols = {"_data"}
+        elif ms_cols:
+            ms_col_set = set(ms_cols)
+            table_datetime_cols = set()
+            for pd_col, table_col in zip(
+                table_meta.pd_column_dtypes.keys(), table_meta.table_column_names
+            ):
+                if pd_col in ms_col_set:
+                    table_datetime_cols.add(table_col)
         df.columns = pd.Index(table_meta.table_column_names)
         if not ignore_index:
             df = df.rename_axis(table_meta.table_index_column_names).reset_index()
-    elif ignore_index:
+    elif ignore_index and table_meta.type != OutputType.index:
         df = pd.DataFrame([], columns=[])
     elif table_meta.type == OutputType.index:
         names = [f"_idx_{idx}" for idx in range(len(df.names))]
+        table_datetime_cols = set()
+        if ms_cols:
+            if isinstance(df, pd.MultiIndex):
+                table_datetime_cols = {f"_idx_{idx}" for idx in ms_cols}
+            else:
+                table_datetime_cols = {"_idx_0"}
         df = df.to_frame(name=names[0] if len(names) == 1 else names)
     elif table_meta.type == OutputType.scalar:
         names = ["_idx_0"]
@@ -88,4 +109,15 @@ def pandas_to_arrow(
             df = pd.DataFrame([[df]], columns=names)
     else:  # this could never happen  # pragma: no cover
         raise ValueError(f"Does not support meta type {table_meta.type!r}")
-    return pa.Table.from_pandas(df, nthreads=nthreads, preserve_index=False), table_meta
+    pa_table = pa.Table.from_pandas(df, nthreads=nthreads, preserve_index=False)
+    if table_datetime_cols:
+        col_names = pa_table.schema.names
+        col_datas = []
+        for idx, col_name in enumerate(pa_table.schema.names):
+            if col_name not in table_datetime_cols:
+                col_datas.append(pa_table.column(idx))
+                continue
+            col_data = pa_table.column(idx).cast(pa.timestamp("ms"))
+            col_datas.append(col_data)
+        pa_table = pa.Table.from_arrays(col_datas, names=col_names)
+    return pa_table, table_meta

maxframe/{odpsio → io/odpsio}/schema.py RENAMED Viewed

@@ -16,14 +16,15 @@ import string
 from collections import defaultdict
 from typing import Any, Dict, Tuple
+import numpy as np
 import pandas as pd
 import pyarrow as pa
 from odps import types as odps_types
 from pandas.api import types as pd_types
-from ..core import TILEABLE_TYPE, OutputType
-from ..protocol import DataFrameTableMeta
-from ..tensor.core import TENSOR_TYPE
+from ...core import TILEABLE_TYPE, OutputType
+from ...protocol import DataFrameTableMeta
+from ...tensor.core import TENSOR_TYPE
 _TEMP_TABLE_PREFIX = "tmp_mf_"
@@ -39,6 +40,7 @@ _arrow_to_odps_types = {
     pa.float64(): odps_types.double,
     pa.date32(): odps_types.date,
     pa.timestamp("ms"): odps_types.datetime,
+    pa.timestamp("us"): odps_types.timestamp,
     pa.timestamp("ns"): odps_types.timestamp,
 }
@@ -54,7 +56,9 @@ _odps_type_to_arrow = {
     odps_types.double: pa.float64(),
     odps_types.date: pa.date32(),
     odps_types.datetime: pa.timestamp("ms"),
+    odps_types.json: pa.string(),
     odps_types.timestamp: pa.timestamp("ns"),
+    odps_types.timestamp_ntz: pa.timestamp("ns"),
 }
@@ -126,10 +130,15 @@ def odps_type_to_arrow_type(
             ]
             col_type = pa.struct(fields)
         elif isinstance(odps_type, odps_types.Decimal):
-            col_type = pa.decimal128(
-                odps_type.precision or odps_types.Decimal._max_precision,
-                odps_type.scale or odps_types.Decimal._max_scale,
-            )
+            if odps_type.name == "decimal":
+                # legacy decimal data without precision or scale
+                # precision data from internal compat mode
+                col_type = pa.decimal128(38, 18)
+            else:
+                col_type = pa.decimal128(
+                    odps_type.precision or odps_types.Decimal._max_precision,
+                    odps_type.scale or odps_types.Decimal._max_scale,
+                )
         elif isinstance(odps_type, (odps_types.Varchar, odps_types.Char)):
             col_type = pa.string()
         else:
@@ -161,7 +170,7 @@ def odps_schema_to_pandas_dtypes(
     return arrow_schema.empty_table().to_pandas().dtypes
-def _is_scalar_object(df_obj: Any) -> bool:
+def is_scalar_object(df_obj: Any) -> bool:
     return (
         isinstance(df_obj, TENSOR_TYPE) and df_obj.shape == ()
     ) or pd_types.is_scalar(df_obj)
@@ -179,10 +188,10 @@ def pandas_to_odps_schema(
     unknown_as_string: bool = False,
     ignore_index=False,
 ) -> Tuple[odps_types.OdpsSchema, DataFrameTableMeta]:
-    from .. import dataframe as md
+    from ... import dataframe as md
     from .arrow import pandas_to_arrow
-    if _is_scalar_object(df_obj):
+    if is_scalar_object(df_obj):
         empty_index = None
     elif hasattr(df_obj, "index_value"):
         empty_index = df_obj.index_value.to_pandas()[:0]
@@ -198,20 +207,35 @@ def pandas_to_odps_schema(
     else:
         empty_columns = None
+    ms_cols = None
     if isinstance(df_obj, (md.DataFrame, pd.DataFrame)):
         empty_df_obj = pd.DataFrame(
             [], columns=empty_columns, index=empty_index
         ).astype(df_obj.dtypes)
+        ms_cols = [
+            col for col, dt in df_obj.dtypes.items() if dt == np.dtype("datetime64[ms]")
+        ]
     elif isinstance(df_obj, (md.Series, pd.Series)):
         empty_df_obj = pd.Series([], name=df_obj.name, index=empty_index).astype(
             df_obj.dtype
         )
+        ms_cols = df_obj.dtype == np.dtype("datetime64[ms]")
     elif isinstance(df_obj, (md.Index, pd.Index)):
         empty_df_obj = empty_index
+        if isinstance(empty_index, pd.MultiIndex):
+            ms_cols = [
+                idx
+                for idx, dt in enumerate(empty_index.dtypes.values)
+                if dt == np.dtype("datetime64[ms]")
+            ]
+        else:
+            ms_cols = df_obj.dtype == np.dtype("datetime64[ms]")
     else:
         empty_df_obj = df_obj
-    arrow_data, table_meta = pandas_to_arrow(empty_df_obj, ignore_index=ignore_index)
+    arrow_data, table_meta = pandas_to_arrow(
+        empty_df_obj, ignore_index=ignore_index, ms_cols=ms_cols
+    )
     return (
         arrow_schema_to_odps_schema(
             arrow_data.schema, unknown_as_string=unknown_as_string
@@ -273,7 +297,7 @@ def build_table_column_name(
 def build_dataframe_table_meta(
     df_obj: Any, ignore_index: bool = False
 ) -> DataFrameTableMeta:
-    from .. import dataframe as md
+    from ... import dataframe as md
     col_to_count = defaultdict(lambda: 0)
     col_to_idx = defaultdict(lambda: 0)
@@ -284,13 +308,11 @@ def build_dataframe_table_meta(
         obj_type = OutputType.series
     elif isinstance(df_obj, (md.Index, pd.Index)):
         obj_type = OutputType.index
-    elif _is_scalar_object(df_obj):
+    elif is_scalar_object(df_obj):
         obj_type = OutputType.scalar
     else:  # pragma: no cover
         raise TypeError(f"Cannot accept type {type(df_obj)}")
-    assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
     if obj_type == OutputType.scalar:
         pd_dtypes = pd.Series([])
         column_index_names = []
@@ -346,7 +368,7 @@ def build_dataframe_table_meta(
     else:
         index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
-    if ignore_index:
+    if ignore_index and obj_type != OutputType.index:
         table_index_column_names = []
         pd_index_dtypes = pd.Series([], index=[])
     else: