PyPI - maxframe - Versions diffs - 1.0.0rc1__cp38-cp38-macosx_10_9_universal2.whl → 1.0.0rc3__cp38-cp38-macosx_10_9_universal2.whl - Mend

maxframe 1.0.0rc1__cp38-cp38-macosx_10_9_universal2.whl → 1.0.0rc3__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (138) hide show

maxframe/_utils.cpython-38-darwin.so +0 -0
maxframe/codegen.py +3 -6
maxframe/config/config.py +49 -10
maxframe/config/validators.py +42 -11
maxframe/conftest.py +15 -2
maxframe/core/__init__.py +2 -13
maxframe/core/entity/__init__.py +0 -4
maxframe/core/entity/objects.py +46 -3
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cpython-38-darwin.so +0 -0
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +5 -55
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
maxframe/dataframe/core.py +5 -5
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +7 -1
maxframe/dataframe/datasource/read_odps_table.py +3 -2
maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
maxframe/dataframe/datastore/to_odps.py +1 -1
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/rename.py +3 -37
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/merge/merge.py +236 -2
maxframe/dataframe/merge/tests/test_merge.py +123 -0
maxframe/dataframe/misc/apply.py +3 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +4 -25
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/reduction/core.py +2 -2
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/quantile.py +5 -17
maxframe/dataframe/utils.py +4 -7
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
maxframe/{odpsio → io/odpsio}/arrow.py +12 -8
maxframe/{odpsio → io/odpsio}/schema.py +15 -12
maxframe/io/odpsio/tableio.py +702 -0
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +19 -18
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +57 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -2
maxframe/learn/contrib/xgboost/core.py +87 -2
maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
maxframe/learn/contrib/xgboost/predict.py +21 -7
maxframe/learn/contrib/xgboost/regressor.py +3 -10
maxframe/learn/contrib/xgboost/train.py +27 -17
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
maxframe/protocol.py +41 -17
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cpython-38-darwin.so +0 -0
maxframe/serialization/serializables/core.py +48 -9
maxframe/tensor/__init__.py +69 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +3 -0
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +98 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +70 -0
maxframe/tensor/{base → misc}/__init__.py +2 -0
maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/{base → misc}/unique.py +2 -2
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +1 -0
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tensor/utils.py +2 -22
maxframe/tests/utils.py +11 -2
maxframe/typing_.py +4 -1
maxframe/udf.py +8 -9
maxframe/utils.py +32 -70
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/METADATA +25 -25
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +133 -123
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
maxframe_client/fetcher.py +60 -68
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +58 -22
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +27 -4
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/tableio.py +0 -322
maxframe/odpsio/volumeio.py +0 -95
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base/tests → io}/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0

maxframe/core/graph/builder/tileable.py CHANGED Viewed

@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Generator, Union
+from typing import Generator
 from ...mode import enter_mode
-from ..entity import ChunkGraph, TileableGraph
+from ..entity import TileableGraph
 from .base import AbstractGraphBuilder
@@ -26,9 +26,9 @@ class TileableGraphBuilder(AbstractGraphBuilder):
         super().__init__(graph=graph)
     @enter_mode(build=True, kernel=True)
-    def _build(self) -> Union[TileableGraph, ChunkGraph]:
+    def _build(self) -> TileableGraph:
         self._add_nodes(self._graph, list(self._graph.result_tileables), set())
         return self._graph
-    def build(self) -> Generator[Union[TileableGraph, ChunkGraph], None, None]:
+    def build(self) -> Generator[TileableGraph, None, None]:
         yield self._build()

maxframe/core/graph/builder/utils.py CHANGED Viewed

@@ -13,12 +13,11 @@
 # limitations under the License.
 import itertools
-from typing import List, Union
+from typing import List
 from ....typing_ import TileableType
 from ...mode import enter_mode
-from ..entity import ChunkGraph, TileableGraph
-from .chunk import ChunkGraphBuilder
+from ..entity import EntityGraph, TileableGraph
 from .tileable import TileableGraphBuilder
@@ -28,14 +27,11 @@ def build_graph(
     tile: bool = False,
     fuse_enabled: bool = True,
     **chunk_graph_build_kwargs
-) -> Union[TileableGraph, ChunkGraph]:
+) -> EntityGraph:
     tileables = list(itertools.chain(*(tileable.op.outputs for tileable in tileables)))
     tileable_graph = TileableGraph(tileables)
     tileable_graph_builder = TileableGraphBuilder(tileable_graph)
     tileable_graph = next(tileable_graph_builder.build())
     if not tile:
         return tileable_graph
-    chunk_graph_builder = ChunkGraphBuilder(
-        tileable_graph, fuse_enabled=fuse_enabled, **chunk_graph_build_kwargs
-    )
-    return next(chunk_graph_builder.build())
+    raise NotImplementedError

maxframe/core/graph/core.cpython-38-darwin.so CHANGED Viewed

Binary file

maxframe/core/graph/entity.py CHANGED Viewed

@@ -13,9 +13,9 @@
 # limitations under the License.
 from abc import ABCMeta, abstractmethod
-from typing import Dict, Iterable, List, Union
+from typing import Dict, Iterable, List
-from ...core import Chunk, Tileable
+from ...core import Tileable
 from ...serialization.core import buffered
 from ...serialization.serializables import BoolField, DictField, ListField, Serializable
 from ...serialization.serializables.core import SerializableSerializer
@@ -97,26 +97,6 @@ class TileableGraph(EntityGraph, Iterable[Tileable]):
         return self._logic_key
-class ChunkGraph(EntityGraph, Iterable[Chunk]):
-    _result_chunks: List[Chunk]
-    def __init__(self, result_chunks: List[Chunk] = None):
-        super().__init__()
-        self._result_chunks = result_chunks
-    @property
-    def result_chunks(self):
-        return self._result_chunks
-    @property
-    def results(self):
-        return self._result_chunks
-    @results.setter
-    def results(self, new_results):
-        self._result_chunks = new_results
 class SerializableGraph(Serializable):
     _is_chunk = BoolField("is_chunk")
     # TODO(qinxuye): remove this logic when we handle fetch elegantly,
@@ -132,12 +112,11 @@ class SerializableGraph(Serializable):
     _results = ListField("results")
     @classmethod
-    def from_graph(cls, graph: Union[TileableGraph, ChunkGraph]) -> "SerializableGraph":
+    def from_graph(cls, graph: EntityGraph) -> "SerializableGraph":
         from ..operator import Fetch
-        is_chunk = isinstance(graph, ChunkGraph)
         return SerializableGraph(
-            _is_chunk=is_chunk,
+            _is_chunk=False,
             _fetch_nodes=[chunk for chunk in graph if isinstance(chunk.op, Fetch)],
             _nodes=graph._nodes,
             _predecessors=graph._predecessors,
@@ -145,9 +124,8 @@ class SerializableGraph(Serializable):
             _results=graph.results,
         )
-    def to_graph(self) -> Union[TileableGraph, ChunkGraph]:
-        graph_cls = ChunkGraph if self._is_chunk else TileableGraph
-        graph = graph_cls(self._results)
+    def to_graph(self) -> EntityGraph:
+        graph = TileableGraph(self._results)
         graph._nodes.update(self._nodes)
         graph._predecessors.update(self._predecessors)
         graph._successors.update(self._successors)
@@ -156,14 +134,12 @@ class SerializableGraph(Serializable):
 class GraphSerializer(SerializableSerializer):
     @buffered
-    def serial(self, obj: Union[TileableGraph, ChunkGraph], context: Dict):
+    def serial(self, obj: EntityGraph, context: Dict):
         serializable_graph = SerializableGraph.from_graph(obj)
         return [], [serializable_graph], False
-    def deserial(
-        self, serialized: List, context: Dict, subs: List
-    ) -> Union[TileableGraph, ChunkGraph]:
-        serializable_graph: SerializableGraph = subs[0]
+    def deserial(self, serialized: List, context: Dict, subs: List) -> TileableGraph:
+        serializable_graph: EntityGraph = subs[0]
         return serializable_graph.to_graph()

maxframe/core/operator/__init__.py CHANGED Viewed

@@ -22,13 +22,6 @@ from .base import (
 )
 from .core import TileableOperatorMixin, estimate_size, execute
 from .fetch import Fetch, FetchMixin, FetchShuffle, ShuffleFetchType
-from .fuse import Fuse, FuseChunkMixin
-from .objects import (
-    MergeDictOperator,
-    ObjectFetch,
-    ObjectFuseChunk,
-    ObjectFuseChunkMixin,
-    ObjectOperator,
-    ObjectOperatorMixin,
-)
+from .objects import MergeDictOperator, ObjectFetch, ObjectOperator, ObjectOperatorMixin
 from .shuffle import MapReduceOperator, ShuffleProxy
+from .utils import add_fetch_builder, build_fetch

maxframe/core/operator/base.py CHANGED Viewed

@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import functools
 import weakref
 from copy import deepcopy
 from enum import Enum
-from functools import partial
+from functools import lru_cache, partial
 from typing import Any, Dict, List, Optional, Tuple, Type, Union
 from ...serialization.core import Placeholder
@@ -37,7 +36,6 @@ from ...serialization.serializables.core import SerializableSerializer
 from ...typing_ import OperatorType
 from ...utils import AttributeDict, classproperty, get_user_call_point, tokenize
 from ..base import Base
-from ..entity.chunks import Chunk
 from ..entity.core import ENTITY_TYPE, Entity, EntityData
 from ..entity.output_types import OutputType
 from ..entity.tileables import Tileable
@@ -90,7 +88,7 @@ class SchedulingHint(Serializable):
     priority = Int32Field("priority", default=None)
     @classproperty
-    @functools.lru_cache(1)
+    @lru_cache(1)
     def all_hint_names(cls):
         return list(cls._FIELDS)
@@ -341,7 +339,7 @@ class Operator(Base, OperatorLogicKeyGeneratorMixin, metaclass=OperatorMetaclass
             raise ValueError("Outputs' size exceeds limitation")
     @property
-    def outputs(self) -> List[Union[Chunk, Tileable]]:
+    def outputs(self) -> List[Tileable]:
         outputs = self._outputs
         if outputs:
             return [ref() for ref in outputs]

maxframe/core/operator/objects.py CHANGED Viewed

@@ -17,7 +17,6 @@ from ..entity import OutputType, register_fetch_class
 from .base import Operator
 from .core import TileableOperatorMixin
 from .fetch import Fetch, FetchMixin
-from .fuse import Fuse, FuseChunkMixin
 class ObjectOperator(Operator):
@@ -28,14 +27,6 @@ class ObjectOperatorMixin(TileableOperatorMixin):
     _output_type_ = OutputType.object
-class ObjectFuseChunkMixin(FuseChunkMixin, ObjectOperatorMixin):
-    __slots__ = ()
-class ObjectFuseChunk(ObjectFuseChunkMixin, Fuse):
-    pass
 class ObjectFetch(FetchMixin, ObjectOperatorMixin, Fetch):
     _output_type_ = OutputType.object

maxframe/core/operator/utils.py ADDED Viewed

@@ -0,0 +1,55 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...typing_ import EntityType, TileableType
+from ..entity import TILEABLE_TYPE
+def build_fetch_tileable(tileable: TileableType) -> TileableType:
+    if tileable.is_coarse():
+        chunks = None
+    else:
+        chunks = []
+        for c in tileable.chunks:
+            fetch_chunk = build_fetch(c, index=c.index)
+            chunks.append(fetch_chunk)
+    tileable_op = tileable.op
+    params = tileable.params.copy()
+    new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
+    return new_op.new_tileables(
+        None,
+        chunks=chunks,
+        nsplits=tileable.nsplits,
+        _key=tileable.key,
+        _id=tileable.id,
+        **params,
+    )[0]
+_type_to_builder = [
+    (TILEABLE_TYPE, build_fetch_tileable),
+]
+def build_fetch(entity: EntityType, **kw) -> EntityType:
+    for entity_types, func in _type_to_builder:
+        if isinstance(entity, entity_types):
+            return func(entity, **kw)
+    raise TypeError(f"Type {type(entity)} not supported")
+def add_fetch_builder(entity_type, builder_func):
+    _type_to_builder.append((entity_type, builder_func))

maxframe/dataframe/__init__.py CHANGED Viewed

@@ -54,7 +54,7 @@ from .reduction import CustomReduction, unique
 from .tseries.to_datetime import to_datetime
 try:
-    from pandas import NA, Timestamp
+    from pandas import NA, NaT, Timestamp
 except ImportError:  # pragma: no cover
     pass

maxframe/dataframe/arithmetic/around.py CHANGED Viewed

@@ -43,20 +43,20 @@ def around(df, decimals=0, *args, **kwargs):
     return op(df)
+# FIXME Series input of decimals not supported yet
 around.__frame_doc__ = """
 Round a DataFrame to a variable number of decimal places.
 Parameters
 ----------
-decimals : int, dict, Series
+decimals : int, dict
     Number of decimal places to round each column to. If an int is
     given, round each column to the same number of places.
     Otherwise dict and Series round to variable numbers of places.
     Column names should be in the keys if `decimals` is a
-    dict-like, or in the index if `decimals` is a Series. Any
-    columns not included in `decimals` will be left as is. Elements
-    of `decimals` which are not columns of the input will be
-    ignored.
+    dict-like. Any columns not included in `decimals` will be left
+    as is. Elements of `decimals` which are not columns of the
+    input will be ignored.
 *args
     Additional keywords have no effect but might be accepted for
     compatibility with numpy.
@@ -107,18 +107,6 @@ places as value
 1   0.0   1.0
 2   0.7   0.0
 3   0.2   0.0
-Using a Series, the number of places for specific columns can be
-specified with the column names as index and the number of
-decimal places as value
->>> decimals = md.Series([0, 1], index=['cats', 'dogs'])
->>> df.round(decimals).execute()
-    dogs  cats
-0   0.2   0.0
-1   0.0   1.0
-2   0.7   0.0
-3   0.2   0.0
 """
 around.__series_doc__ = """
 Round each value in a Series to the given number of decimals.

maxframe/dataframe/arithmetic/core.py CHANGED Viewed

@@ -39,7 +39,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
         raise NotImplementedError
     @classmethod
-    def _calc_properties(cls, x1, x2=None, axis="columns"):
+    def _calc_properties(cls, x1, x2=None, axis="columns", level=None):
         if isinstance(x1, DATAFRAME_TYPE) and (
             x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
         ):
@@ -108,7 +108,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                     index = copy.copy(x1.index_value)
                     index_shape = x1.shape[0]
                 else:
-                    index = infer_index_value(x1.index_value, x2.index_value)
+                    index = infer_index_value(
+                        x1.index_value, x2.index_value, level=level
+                    )
                     if index.key == x1.index_value.key == x2.index_value.key and (
                         not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
                     ):
@@ -141,7 +143,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                         column_shape = len(dtypes)
                     else:  # pragma: no cover
                         dtypes = x1.dtypes  # FIXME
-                        columns = infer_index_value(x1.columns_value, x2.index_value)
+                        columns = infer_index_value(
+                            x1.columns_value, x2.index_value, level=level
+                        )
                         column_shape = np.nan
             else:
                 assert axis == "index" or axis == 0
@@ -169,7 +173,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                                 ],
                                 index=x1.dtypes.index,
                             )
-                        index = infer_index_value(x1.index_value, x2.index_value)
+                        index = infer_index_value(
+                            x1.index_value, x2.index_value, level=level
+                        )
                         index_shape = np.nan
             return {
                 "shape": (index_shape, column_shape),
@@ -187,7 +193,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                     index = copy.copy(x1.index_value)
                     index_shape = x1.shape[0]
                 else:
-                    index = infer_index_value(x1.index_value, x2.index_value)
+                    index = infer_index_value(
+                        x1.index_value, x2.index_value, level=level
+                    )
                     if index.key == x1.index_value.key == x2.index_value.key and (
                         not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
                     ):
@@ -237,14 +245,14 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
         self._check_inputs(x1, x2)
         if isinstance(x1, DATAFRAME_TYPE) or isinstance(x2, DATAFRAME_TYPE):
             df1, df2 = (x1, x2) if isinstance(x1, DATAFRAME_TYPE) else (x2, x1)
-            kw = self._calc_properties(df1, df2, axis=self.axis)
+            kw = self._calc_properties(df1, df2, axis=self.axis, level=self.level)
             if not pd.api.types.is_scalar(df2):
                 return self.new_dataframe([x1, x2], **kw)
             else:
                 return self.new_dataframe([df1], **kw)
         if isinstance(x1, SERIES_TYPE) or isinstance(x2, SERIES_TYPE):
             s1, s2 = (x1, x2) if isinstance(x1, SERIES_TYPE) else (x2, x1)
-            kw = self._calc_properties(s1, s2)
+            kw = self._calc_properties(s1, s2, level=self.level)
             if not pd.api.types.is_scalar(s2):
                 return self.new_series([x1, x2], **kw)
             else:

maxframe/dataframe/arithmetic/docstring.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# FIXME：https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/17
 _flex_doc_FRAME = """
 Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 Equivalent to ``{equiv}``, but with support to substitute a fill_value
@@ -127,44 +128,15 @@ circle          0
 triangle        3
 rectangle       4
->>> (df * other).execute()
-           angles  degrees
-circle          0      NaN
-triangle        9      NaN
-rectangle      16      NaN
 >>> df.mul(other, fill_value=0).execute()
            angles  degrees
 circle          0      0.0
 triangle        9      0.0
 rectangle      16      0.0
-Divide by a MultiIndex by level.
->>> df_multindex = md.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
-...                              'degrees': [360, 180, 360, 360, 540, 720]}},
-...                             index=[['A', 'A', 'A', 'B', 'B', 'B'],
-...                                    ['circle', 'triangle', 'rectangle',
-...                                     'square', 'pentagon', 'hexagon']])
->>> df_multindex.execute()
-             angles  degrees
-A circle          0      360
-  triangle        3      180
-  rectangle       4      360
-B square          4      360
-  pentagon        5      540
-  hexagon         6      720
->>> df.div(df_multindex, level=1, fill_value=0).execute()
-             angles  degrees
-A circle        NaN      1.0
-  triangle      1.0      1.0
-  rectangle     1.0      1.0
-B square        0.0      0.0
-  pentagon      0.0      0.0
-  hexagon       0.0      0.0
 """
+# FIXME：https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/28
 _flex_doc_SERIES = """
 Return {desc} of series and other, element-wise (binary operator `{op_name}`).
@@ -213,6 +185,7 @@ e    NaN
 dtype: float64
 """
+# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/48
 _flex_comp_doc_FRAME = """
 Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
@@ -257,7 +230,8 @@ Mismatched indices will be unioned together.
 Examples
 --------
->>> df = pd.DataFrame({{'cost': [250, 150, 100],
+>>> import maxframe.dataframe as md
+>>> df = md.DataFrame({{'cost': [250, 150, 100],
 ...                    'revenue': [100, 250, 300]}},
 ...                   index=['A', 'B', 'C'])
 >>> df.execute()
@@ -332,30 +306,6 @@ A  False    False
 B  False    False
 C  False     True
 D  False    False
-Compare to a MultiIndex by level.
->>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
-...                              'revenue': [100, 250, 300, 200, 175, 225]}},
-...                             index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
-...                                    ['A', 'B', 'C', 'A', 'B', 'C']])
->>> df_multindex.execute()
-      cost  revenue
-Q1 A   250      100
-   B   150      250
-   C   100      300
-Q2 A   150      200
-   B   300      175
-   C   220      225
->>> df.le(df_multindex, level=1).execute()
-       cost  revenue
-Q1 A   True     True
-   B   True     True
-   C   True     True
-Q2 A  False     True
-   B   True    False
-   C   True    False
 """

maxframe/dataframe/arithmetic/tests/test_arithmetic.py CHANGED Viewed

@@ -239,6 +239,28 @@ def test_dataframe_and_series_with_shuffle(func_name, func_opts):
     assert df2.columns_value.key != df1.columns_value.key
+@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
+def test_dataframe_and_series_with_multiindex(func_name, func_opts):
+    data1 = pd.DataFrame(
+        np.random.rand(10, 10),
+        index=pd.MultiIndex.from_arrays(
+            [list("AAAAABBBBB"), [4, 9, 3, 2, 1, 5, 8, 6, 7, 10]]
+        ),
+        columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7],
+    )
+    data1 = to_boolean_if_needed(func_opts.func_name, data1)
+    df1 = from_pandas(data1, chunk_size=5)
+    s1 = from_pandas_series(data1[10].reset_index(level=0, drop=True), chunk_size=6)
+    df2 = getattr(df1, func_opts.func_name)(s1, level=1, axis=0)
+    # test df2's index and columns
+    assert df2.shape == (np.nan, df1.shape[1])
+    assert df2.index_value.key != df1.index_value.key
+    assert df2.index_value.names == df1.index_value.names
+    assert df2.columns_value.key == df1.columns_value.key
 @pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
 def test_series_and_series_with_align_map(func_name, func_opts):
     data1 = pd.DataFrame(

maxframe/dataframe/core.py CHANGED Viewed

@@ -1086,11 +1086,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
         --------
         >>> import maxframe.dataframe as md
         >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
-        >>> s.ndim.execute()
+        >>> s.ndim
         1
         >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-        >>> df.ndim.execute()
+        >>> df.ndim
         2
         """
         return super().ndim
@@ -1520,7 +1520,7 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
         self._columns_value = parse_index(dtypes.index, store_data=True)
         self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
         new_shape = list(self._shape)
-        new_shape[0] = len(dtypes)
+        new_shape[-1] = len(dtypes)
         self._shape = tuple(new_shape)
     @property
@@ -1761,11 +1761,11 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
         --------
         >>> import maxframe.dataframe as md
         >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
-        >>> s.ndim.execute()
+        >>> s.ndim
         1
         >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-        >>> df.ndim.execute()
+        >>> df.ndim
         2
         """
         return super().ndim

maxframe/dataframe/datasource/date_range.py CHANGED Viewed

@@ -22,7 +22,7 @@ from pandas._libs.tslibs import timezones
 from pandas.tseries.frequencies import to_offset
 from pandas.tseries.offsets import Tick
-from ... import opcodes as OperandDef
+from ... import opcodes
 from ...core import OutputType
 from ...serialization.serializables import AnyField, BoolField, Int64Field, StringField
 from ...utils import no_default, pd_release_version
@@ -117,7 +117,7 @@ def generate_range_count(
 class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
-    _op_type_ = OperandDef.DATE_RANGE
+    _op_type_ = opcodes.DATE_RANGE
     start = AnyField("start")
     end = AnyField("end")

maxframe/dataframe/datasource/read_odps_query.py CHANGED Viewed

@@ -24,7 +24,7 @@ from odps.types import Column, OdpsSchema, validate_data_type
 from ... import opcodes
 from ...core import OutputType
 from ...core.graph import DAG
-from ...odpsio import odps_schema_to_pandas_dtypes
+from ...io.odpsio import odps_schema_to_pandas_dtypes
 from ...serialization.serializables import (
     AnyField,
     BoolField,
@@ -47,6 +47,7 @@ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
     re.MULTILINE,
 )
 _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
+_ANONYMOUS_COL_REGEX = re.compile(r"^_c\d+$")
 @dataclasses.dataclass
@@ -272,6 +273,11 @@ def read_odps_query(
     explain_str = list(inst.get_task_results().values())[0]
     odps_schema = _parse_explained_schema(explain_str)
+    for col in odps_schema.columns:
+        if _ANONYMOUS_COL_REGEX.match(col.name) and col.name not in query:
+            raise ValueError("Need to specify names for all columns in SELECT clause.")
     dtypes = odps_schema_to_pandas_dtypes(odps_schema)
     if not index_col:

maxframe/dataframe/datasource/read_odps_table.py CHANGED Viewed

@@ -23,7 +23,7 @@ from odps.utils import to_timestamp
 from ... import opcodes
 from ...core import OutputType
-from ...odpsio import odps_schema_to_pandas_dtypes
+from ...io.odpsio import odps_schema_to_pandas_dtypes
 from ...serialization.serializables import (
     AnyField,
     BoolField,
@@ -119,9 +119,10 @@ class DataFrameReadODPSTable(
             return self.new_tileable(
                 [],
                 None,
-                shape=shape,
+                shape=shape[:1],
                 name=getattr(index_value, "name", None),
                 names=getattr(index_value, "names", None),
+                dtype=self.index_dtypes.iloc[0],
                 index_value=index_value,
                 chunk_bytes=chunk_bytes,
                 chunk_size=chunk_size,

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -21,6 +21,7 @@ import pytest
 from odps import ODPS
 from .... import tensor as mt
+from ....core import OutputType
 from ....tests.utils import tn
 from ....utils import lazy_import
 from ... import read_odps_query, read_odps_table
@@ -295,6 +296,15 @@ def test_from_odps_table():
         ),
     )
+    out_idx = read_odps_table(
+        test_table,
+        columns=[],
+        index_col=["col1", "col2"],
+        output_type=OutputType.index,
+    )
+    assert out_idx.names == ["col1", "col2"]
+    assert out_idx.shape == (np.nan,)
     test_table.drop()
     test_parted_table.drop()
@@ -319,6 +329,10 @@ def test_from_odps_query():
         read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
     assert "instant query" in err_info.value.args[0]
+    with pytest.raises(ValueError) as err_info:
+        read_odps_query(f"SELECT col1, col2 + col3 FROM {table1_name}")
+    assert "names" in err_info.value.args[0]
     query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
     df = read_odps_query(query1)
     assert df.op.query == query1