PyPI - maxframe - Versions diffs - 1.0.0rc1__cp37-cp37m-win_amd64.whl → 1.0.0rc3__cp37-cp37m-win_amd64.whl - Mend

maxframe 1.0.0rc1__cp37-cp37m-win_amd64.whl → 1.0.0rc3__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (138) hide show

maxframe/_utils.cp37-win_amd64.pyd +0 -0
maxframe/codegen.py +3 -6
maxframe/config/config.py +49 -10
maxframe/config/validators.py +42 -11
maxframe/conftest.py +15 -2
maxframe/core/__init__.py +2 -13
maxframe/core/entity/__init__.py +0 -4
maxframe/core/entity/objects.py +46 -3
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +5 -55
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
maxframe/dataframe/core.py +5 -5
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +7 -1
maxframe/dataframe/datasource/read_odps_table.py +3 -2
maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
maxframe/dataframe/datastore/to_odps.py +1 -1
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/rename.py +3 -37
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/merge/merge.py +236 -2
maxframe/dataframe/merge/tests/test_merge.py +123 -0
maxframe/dataframe/misc/apply.py +3 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +4 -25
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/reduction/core.py +2 -2
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/quantile.py +5 -17
maxframe/dataframe/utils.py +4 -7
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
maxframe/{odpsio → io/odpsio}/arrow.py +12 -8
maxframe/{odpsio → io/odpsio}/schema.py +15 -12
maxframe/io/odpsio/tableio.py +702 -0
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +19 -18
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +57 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -2
maxframe/learn/contrib/xgboost/core.py +87 -2
maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
maxframe/learn/contrib/xgboost/predict.py +21 -7
maxframe/learn/contrib/xgboost/regressor.py +3 -10
maxframe/learn/contrib/xgboost/train.py +27 -17
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
maxframe/protocol.py +41 -17
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
maxframe/serialization/serializables/core.py +48 -9
maxframe/tensor/__init__.py +69 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +3 -0
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +98 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +70 -0
maxframe/tensor/{base → misc}/__init__.py +2 -0
maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/{base → misc}/unique.py +2 -2
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +1 -0
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tensor/utils.py +2 -22
maxframe/tests/utils.py +11 -2
maxframe/typing_.py +4 -1
maxframe/udf.py +8 -9
maxframe/utils.py +32 -70
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/METADATA +2 -2
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +133 -123
maxframe_client/fetcher.py +60 -68
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +58 -22
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +27 -4
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/tableio.py +0 -322
maxframe/odpsio/volumeio.py +0 -95
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base/tests → io}/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +0 -0
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0

maxframe/dataframe/misc/apply.py CHANGED Viewed

@@ -319,6 +319,7 @@ def df_apply(
     skip_infer=False,
     **kwds,
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/50
     """
     Apply a function along an axis of the DataFrame.
@@ -444,20 +445,12 @@ def df_apply(
     B    27
     dtype: int64
-    >>> df.apply(np.sum, axis=1).execute()
+    >>> df.apply(lambda row: int(np.sum(row)), axis=1).execute()
     0    13
     1    13
     2    13
     dtype: int64
-    Returning a list-like will result in a Series
-    >>> df.apply(lambda x: [1, 2], axis=1).execute()
-    0    [1, 2]
-    1    [1, 2]
-    2    [1, 2]
-    dtype: object
     Passing ``result_type='expand'`` will expand list-like results
     to columns of a Dataframe
@@ -471,7 +464,7 @@ def df_apply(
     ``result_type='expand'``. The resulting column names
     will be the Series index.
-    >>> df.apply(lambda x: md.Series([1, 2], index=['foo', 'bar']), axis=1).execute()
+    >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1).execute()
        foo  bar
     0    1    2
     1    1    2

maxframe/dataframe/misc/case_when.py CHANGED Viewed

@@ -99,7 +99,7 @@ def case_when(series, caselist):
     >>> b = md.Series([0, 3, 4, 5])
     >>> c.case_when(caselist=[(a.gt(0), a),  # condition, replacement
-    ...                       (b.gt(0), b)])
+    ...                       (b.gt(0), b)]).execute()
     0    6
     1    3
     2    1

maxframe/dataframe/misc/describe.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import numpy as np
 import pandas as pd
-from ... import opcodes as OperandDef
+from ... import opcodes
 from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
 from ..core import SERIES_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -23,7 +23,7 @@ from ..utils import build_empty_df, parse_index
 class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
-    _op_type_ = OperandDef.DESCRIBE
+    _op_type_ = opcodes.DESCRIBE
     input = KeyField("input", default=None)
     percentiles = ListField("percentiles", FieldTypes.float64, default=None)

maxframe/dataframe/misc/drop_duplicates.py CHANGED Viewed

@@ -37,16 +37,15 @@ class DataFrameDropDuplicates(DuplicateOperand):
             shape += (3,)
         return shape
-    @classmethod
-    def _gen_tileable_params(cls, op: "DataFrameDropDuplicates", input_params):
+    def _gen_tileable_params(self, op: "DataFrameDropDuplicates", input_params):
         params = input_params.copy()
-        if op.ignore_index:
+        if op.ignore_index and self._output_types[0] != OutputType.index:
             params["index_value"] = parse_index(pd.RangeIndex(-1))
         else:
             params["index_value"] = gen_unknown_index_value(
                 input_params["index_value"], op.keep, op.subset, type(op).__name__
             )
-        params["shape"] = cls._get_shape(input_params["shape"], op)
+        params["shape"] = self._get_shape(input_params["shape"], op)
         return params
     def __call__(self, inp, inplace=False):
@@ -105,6 +104,7 @@ def df_drop_duplicates(
 def series_drop_duplicates(
     series, keep="first", inplace=False, ignore_index=False, method="auto"
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/12
     """
     Return Series with duplicate values removed.
@@ -148,27 +148,6 @@ def series_drop_duplicates(
     5     hippo
     Name: animal, dtype: object
-    With the 'keep' parameter, the selection behaviour of duplicated values
-    can be changed. The value 'first' keeps the first occurrence for each
-    set of duplicated entries. The default value of keep is 'first'.
-    >>> s.drop_duplicates().execute()
-    0      lame
-    1       cow
-    3    beetle
-    5     hippo
-    Name: animal, dtype: object
-    The value 'last' for parameter 'keep' keeps the last occurrence for
-    each set of duplicated entries.
-    >>> s.drop_duplicates(keep='last').execute()
-    1       cow
-    3    beetle
-    4      lame
-    5     hippo
-    Name: animal, dtype: object
     The value ``False`` for parameter 'keep' discards all sets of
     duplicated entries. Setting the value of 'inplace' to ``True`` performs
     the operation inplace and returns ``None``.

maxframe/dataframe/misc/eval.py CHANGED Viewed

@@ -120,6 +120,10 @@ class CollectionVisitor(ast.NodeVisitor):
         if obj_name in self.env:
             self.referenced_vars.add(obj_name)
             return self.env[obj_name]
+        try:
+            return self.target[obj_name]
+        except KeyError:
+            pass
         raise KeyError(f"name {obj_name} is not defined")
     def visit(self, node):

maxframe/dataframe/misc/pct_change.py CHANGED Viewed

@@ -18,6 +18,7 @@ from ..utils import validate_axis
 def pct_change(
     df_or_series, periods=1, fill_method="pad", limit=None, freq=None, **kwargs
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/32
     """
     Percentage change between the current and a prior element.
@@ -50,89 +51,6 @@ def pct_change(
     DataFrame.diff : Compute the difference of two elements in a DataFrame.
     Series.shift : Shift the index by some number of periods.
     DataFrame.shift : Shift the index by some number of periods.
-    Examples
-    --------
-    **Series**
-    >>> import maxframe.dataframe as md
-    >>> s = md.Series([90, 91, 85])
-    >>> s.execute()
-    0    90
-    1    91
-    2    85
-    dtype: int64
-    >>> s.pct_change().execute()
-    0         NaN
-    1    0.011111
-    2   -0.065934
-    dtype: float64
-    >>> s.pct_change(periods=2).execute()
-    0         NaN
-    1         NaN
-    2   -0.055556
-    dtype: float64
-    See the percentage change in a Series where filling NAs with last
-    valid observation forward to next valid.
-    >>> s = md.Series([90, 91, None, 85])
-    >>> s.execute()
-    0    90.0
-    1    91.0
-    2     NaN
-    3    85.0
-    dtype: float64
-    >>> s.pct_change(fill_method='ffill').execute()
-    0         NaN
-    1    0.011111
-    2    0.000000
-    3   -0.065934
-    dtype: float64
-    **DataFrame**
-    Percentage change in French franc, Deutsche Mark, and Italian lira from
-    1980-01-01 to 1980-03-01.
-    >>> df = md.DataFrame({
-    ...     'FR': [4.0405, 4.0963, 4.3149],
-    ...     'GR': [1.7246, 1.7482, 1.8519],
-    ...     'IT': [804.74, 810.01, 860.13]},
-    ...     index=['1980-01-01', '1980-02-01', '1980-03-01'])
-    >>> df.execute()
-                    FR      GR      IT
-    1980-01-01  4.0405  1.7246  804.74
-    1980-02-01  4.0963  1.7482  810.01
-    1980-03-01  4.3149  1.8519  860.13
-    >>> df.pct_change().execute()
-                      FR        GR        IT
-    1980-01-01       NaN       NaN       NaN
-    1980-02-01  0.013810  0.013684  0.006549
-    1980-03-01  0.053365  0.059318  0.061876
-    Percentage of change in GOOG and APPL stock volume. Shows computing
-    the percentage change between columns.
-    >>> df = md.DataFrame({
-    ...     '2016': [1769950, 30586265],
-    ...     '2015': [1500923, 40912316],
-    ...     '2014': [1371819, 41403351]},
-    ...     index=['GOOG', 'APPL'])
-    >>> df.execute()
-              2016      2015      2014
-    GOOG   1769950   1500923   1371819
-    APPL  30586265  40912316  41403351
-    >>> df.pct_change(axis='columns').execute()
-          2016      2015      2014
-    GOOG   NaN -0.151997 -0.086016
-    APPL   NaN  0.337604  0.012002
     """
     axis = validate_axis(kwargs.pop("axis", 0))

maxframe/dataframe/misc/transform.py CHANGED Viewed

@@ -228,21 +228,6 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
     0  1  2
     1  2  3
     2  3  4
-    Even though the resulting DataFrame must have the same length as the
-    input DataFrame, it is possible to provide several input functions:
-    >>> s = md.Series(range(3))
-    >>> s.execute()
-    0    0
-    1    1
-    2    2
-    dtype: int64
-    >>> s.transform([mt.sqrt, mt.exp]).execute()
-           sqrt        exp
-    0  0.000000   1.000000
-    1  1.000000   2.718282
-    2  1.414214   7.389056
     """
     op = TransformOperator(
         func=func,
@@ -265,6 +250,7 @@ def series_transform(
     dtype=None,
     **kwargs
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/10
     """
     Call ``func`` on self producing a Series with transformed values.
@@ -332,21 +318,6 @@ def series_transform(
     0  1  2
     1  2  3
     2  3  4
-    Even though the resulting Series must have the same length as the
-    input Series, it is possible to provide several input functions:
-    >>> s = md.Series(range(3))
-    >>> s.execute()
-    0    0
-    1    1
-    2    2
-    dtype: int64
-    >>> s.transform([mt.sqrt, mt.exp]).execute()
-       sqrt        exp
-    0  0.000000   1.000000
-    1  1.000000   2.718282
-    2  1.414214   7.389056
     """
     op = TransformOperator(
         func=func,

maxframe/dataframe/misc/value_counts.py CHANGED Viewed

@@ -85,6 +85,7 @@ def value_counts(
     dropna=True,
     method="auto",
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/33
     """
     Return a Series containing counts of unique values.
@@ -125,9 +126,8 @@ def value_counts(
     Examples
     --------
     >>> import maxframe.dataframe as md
-    >>> import maxframe.tensor as mt
-    >>> s = md.Series([3, 1, 2, 3, 4, mt.nan])
+    >>> import numpy as np
+    >>> s = md.Series([3, 1, 2, 3, 4, np.nan])
     >>> s.value_counts().execute()
     3.0    2
     4.0    1
@@ -138,7 +138,7 @@ def value_counts(
     With `normalize` set to `True`, returns the relative frequency by
     dividing all values by the sum of values.
-    >>> s = md.Series([3, 1, 2, 3, 4, mt.nan])
+    >>> s = md.Series([3, 1, 2, 3, 4, np.nan])
     >>> s.value_counts(normalize=True).execute()
     3.0    0.4
     4.0    0.2
@@ -146,19 +146,6 @@ def value_counts(
     1.0    0.2
     dtype: float64
-    **bins**
-    Bins can be useful for going from a continuous variable to a
-    categorical variable; instead of counting unique
-    apparitions of values, divide the index in the specified
-    number of half-open bins.
-    >>> s.value_counts(bins=3).execute()
-    (2.0, 3.0]      2
-    (0.996, 2.0]    2
-    (3.0, 4.0]      1
-    dtype: int64
     **dropna**
     With `dropna` set to `False` we can also see NaN index values.

maxframe/dataframe/missing/dropna.py CHANGED Viewed

@@ -234,7 +234,7 @@ def series_dropna(series, axis=0, inplace=False, how=None):
     Empty strings are not considered NA values. ``None`` is considered an
     NA value.
-    >>> ser = md.Series([np.NaN, 2, md.NaT, '', None, 'I stay'])
+    >>> ser = md.Series([np.NaN, '2', md.NaT, '', None, 'I stay'])
     >>> ser.execute()
     0       NaN
     1         2

maxframe/dataframe/missing/fillna.py CHANGED Viewed

@@ -132,11 +132,11 @@ def fillna(
     --------
     >>> import maxframe.tensor as mt
     >>> import maxframe.dataframe as md
-    >>> df = md.DataFrame([[mt.nan, 2, mt.nan, 0],
-    ...                    [3, 4, mt.nan, 1],
-    ...                    [mt.nan, mt.nan, mt.nan, 5],
-    ...                    [mt.nan, 3, mt.nan, 4]],
-    ...                   columns=list('ABCD'))
+    >>> df = md.DataFrame([[np.nan, 2, np.nan, 0],
+                           [3, 4, np.nan, 1],
+                           [np.nan, np.nan, np.nan, 5],
+                           [np.nan, 3, np.nan, 4]],
+                          columns=list('ABCD'))
     >>> df.execute()
          A    B   C  D
     0  NaN  2.0 NaN  0

maxframe/dataframe/operators.py CHANGED Viewed

@@ -16,13 +16,7 @@ import numpy as np
 import pandas as pd
 from ..core import ENTITY_TYPE, OutputType
-from ..core.operator import (
-    Fuse,
-    FuseChunkMixin,
-    Operator,
-    ShuffleProxy,
-    TileableOperatorMixin,
-)
+from ..core.operator import Operator, ShuffleProxy, TileableOperatorMixin
 from ..tensor.core import TENSOR_TYPE
 from ..tensor.datasource import tensor as astensor
 from .core import DATAFRAME_TYPE, SERIES_TYPE
@@ -261,13 +255,3 @@ DataFrameOperator = Operator
 class DataFrameShuffleProxy(ShuffleProxy, DataFrameOperatorMixin):
     def __init__(self, sparse=None, output_types=None, **kwargs):
         super().__init__(sparse=sparse, _output_types=output_types, **kwargs)
-class DataFrameFuseChunkMixin(FuseChunkMixin, DataFrameOperatorMixin):
-    __slots__ = ()
-class DataFrameFuseChunk(Fuse, DataFrameFuseChunkMixin):
-    @property
-    def output_types(self):
-        return self.outputs[-1].chunk.op.output_types

maxframe/dataframe/reduction/core.py CHANGED Viewed

@@ -552,7 +552,7 @@ class ReductionCompiler:
     @enter_mode(build=True)
     def _compile_function(self, func, func_name=None, ndim=1) -> ReductionSteps:
         from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
-        from ...tensor.base import TensorWhere
+        from ...tensor.misc import TensorWhere
         from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
         from ..datasource.dataframe import DataFrameDataSource
         from ..datasource.series import SeriesDataSource
@@ -679,8 +679,8 @@ class ReductionCompiler:
         ]
         """
         from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
-        from ...tensor.base import TensorWhere
         from ...tensor.datasource import Scalar
+        from ...tensor.misc import TensorWhere
         from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
         from ..datasource.dataframe import DataFrameDataSource
         from ..datasource.series import SeriesDataSource

maxframe/dataframe/sort/sort_values.py CHANGED Viewed

@@ -67,6 +67,7 @@ def dataframe_sort_values(
     parallel_kind="PSRS",
     psrs_kinds=None,
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/15
     """
     Sort by the values along either axis.
@@ -152,17 +153,6 @@ def dataframe_sort_values(
     0   A    2    0
     1   A    1    1
     3   NaN  8    4
-    Putting NAs first
-    >>> df.sort_values(by='col1', ascending=False, na_position='first').execute()
-        col1 col2 col3
-    3   NaN  8    4
-    4   D    7    2
-    5   C    4    3
-    2   B    9    9
-    0   A    2    0
-    1   A    1    1
     """
     if na_position not in ["last", "first"]:  # pragma: no cover

maxframe/dataframe/statistics/quantile.py CHANGED Viewed

@@ -14,8 +14,9 @@
 import numpy as np
 import pandas as pd
+from pandas.core.dtypes.cast import find_common_type
-from ... import opcodes as OperandDef
+from ... import opcodes
 from ...core import ENTITY_TYPE
 from ...serialization.serializables import (
     AnyField,
@@ -32,11 +33,11 @@ from ...tensor.datasource import tensor as astensor
 from ...tensor.statistics.quantile import quantile as tensor_quantile
 from ..core import DATAFRAME_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
-from ..utils import build_empty_df, find_common_type, parse_index, validate_axis
+from ..utils import build_empty_df, parse_index, validate_axis
 class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
-    _op_type_ = OperandDef.QUANTILE
+    _op_type_ = opcodes.QUANTILE
     input = KeyField("input", default=None)
     q = AnyField("q", default=None)
@@ -259,6 +260,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
 def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
+    # FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
     """
     Return values at the given quantile over requested axis.
@@ -309,20 +311,6 @@ def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="line
            a     b
     0.1  1.3   3.7
     0.5  2.5  55.0
-    Specifying `numeric_only=False` will also compute the quantile of
-    datetime and timedelta data.
-    >>> df = md.DataFrame({'A': [1, 2],
-    ...                    'B': [md.Timestamp('2010'),
-    ...                          md.Timestamp('2011')],
-    ...                    'C': [md.Timedelta('1 days'),
-    ...                          md.Timedelta('2 days')]})
-    >>> df.quantile(0.5, numeric_only=False).execute()
-    A                    1.5
-    B    2010-07-02 12:00:00
-    C        1 days 12:00:00
-    Name: 0.5, dtype: object
     """
     if isinstance(q, ENTITY_TYPE):
         q = astensor(q)

maxframe/dataframe/utils.py CHANGED Viewed

@@ -26,7 +26,6 @@ import numpy as np
 import pandas as pd
 from pandas.api.extensions import ExtensionDtype
 from pandas.api.types import is_string_dtype
-from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.inference import is_dict_like, is_list_like
 from ..core import Entity, ExecutableTuple
@@ -477,11 +476,11 @@ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
     else:
         fill_values = fill_value
-    from .core import SERIES_TYPE
+    from .core import INDEX_TYPE, SERIES_TYPE
     dtypes = (
         pd.Series([df_obj.dtype], index=[df_obj.name])
-        if isinstance(df_obj, SERIES_TYPE)
+        if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
         else df_obj.dtypes
     )
     for size, fill_value in zip(sizes, fill_values):
@@ -593,7 +592,7 @@ def build_series(
     return ret_series
-def infer_index_value(left_index_value, right_index_value):
+def infer_index_value(left_index_value, right_index_value, level=None):
     from .core import IndexValue
     if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
@@ -616,9 +615,7 @@ def infer_index_value(left_index_value, right_index_value):
     left_index = left_index_value.to_pandas()
     right_index = right_index_value.to_pandas()
-    out_index = pd.Index(
-        [], dtype=find_common_type([left_index.dtype, right_index.dtype])
-    )
+    out_index = left_index.join(right_index, level=level)[:0]
     return parse_index(out_index, left_index_value, right_index_value)

maxframe/io/objects/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright 1999-2024 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .core import (
+    AbstractObjectIOHandler,
+    get_object_io_handler,
+    register_object_io_handler,
+)
+# isort: off
+from . import tensor
+del tensor