PyPI - maxframe - Versions diffs - 0.1.0b5__cp37-cp37m-win32.whl → 1.0.0rc2__cp37-cp37m-win32.whl - Mend

maxframe 0.1.0b5cp37-cp37m-win32.whl → 1.0.0rc2cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (92) hide show

maxframe/_utils.cp37-win32.pyd +0 -0
maxframe/codegen.py +6 -2
maxframe/config/config.py +38 -2
maxframe/config/validators.py +1 -0
maxframe/conftest.py +2 -0
maxframe/core/__init__.py +0 -3
maxframe/core/entity/__init__.py +1 -8
maxframe/core/entity/objects.py +3 -45
maxframe/core/graph/core.cp37-win32.pyd +0 -0
maxframe/core/graph/core.pyx +4 -4
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +5 -55
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
maxframe/dataframe/core.py +5 -5
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +6 -0
maxframe/dataframe/datasource/read_odps_table.py +2 -1
maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
maxframe/dataframe/datastore/tests/__init__.py +13 -0
maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
maxframe/dataframe/datastore/to_odps.py +21 -0
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/align.py +1 -1
maxframe/dataframe/indexing/rename.py +3 -37
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/merge/merge.py +236 -2
maxframe/dataframe/merge/tests/test_merge.py +123 -0
maxframe/dataframe/misc/apply.py +5 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +4 -25
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/memory_usage.py +2 -2
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/tests/test_misc.py +23 -0
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/corr.py +3 -3
maxframe/dataframe/statistics/quantile.py +5 -17
maxframe/dataframe/utils.py +4 -7
maxframe/errors.py +13 -0
maxframe/extension.py +12 -0
maxframe/learn/contrib/xgboost/dmatrix.py +2 -2
maxframe/learn/contrib/xgboost/predict.py +2 -2
maxframe/learn/contrib/xgboost/train.py +2 -2
maxframe/lib/mmh3.cp37-win32.pyd +0 -0
maxframe/lib/mmh3.pyi +43 -0
maxframe/lib/wrapped_pickle.py +2 -1
maxframe/odpsio/__init__.py +1 -1
maxframe/odpsio/arrow.py +8 -4
maxframe/odpsio/schema.py +10 -7
maxframe/odpsio/tableio.py +388 -14
maxframe/odpsio/tests/test_schema.py +16 -15
maxframe/odpsio/tests/test_tableio.py +48 -21
maxframe/protocol.py +148 -12
maxframe/serialization/core.cp37-win32.pyd +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +3 -0
maxframe/serialization/core.pyx +54 -25
maxframe/serialization/exception.py +1 -1
maxframe/serialization/pandas.py +7 -2
maxframe/serialization/serializables/core.py +158 -12
maxframe/serialization/serializables/tests/test_serializable.py +46 -4
maxframe/tensor/__init__.py +59 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
maxframe/tensor/base/atleast_1d.py +1 -1
maxframe/tensor/base/unique.py +3 -3
maxframe/tensor/reduction/count_nonzero.py +1 -1
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tests/test_protocol.py +34 -0
maxframe/tests/test_utils.py +0 -12
maxframe/tests/utils.py +11 -2
maxframe/utils.py +24 -13
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/METADATA +75 -2
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/RECORD +91 -89
maxframe_client/__init__.py +0 -1
maxframe_client/fetcher.py +38 -27
maxframe_client/session/odps.py +50 -10
maxframe_client/session/task.py +41 -20
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +49 -2
maxframe_client/clients/spe.py +0 -104
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/WHEEL +0 -0
{maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/top_level.txt +0 -0

maxframe/dataframe/merge/tests/test_merge.py CHANGED Viewed

@@ -19,6 +19,7 @@ import pytest
 from ...core import IndexValue
 from ...datasource.dataframe import from_pandas
 from .. import DataFrameMerge, concat
+from ..merge import DistributedMapJoinHint, MapJoinHint, SkewJoinHint
 def test_merge():
@@ -30,14 +31,39 @@ def test_merge():
     mdf1 = from_pandas(df1, chunk_size=2)
     mdf2 = from_pandas(df2, chunk_size=3)
+    mapjoin = MapJoinHint()
+    dist_mapjoin1 = DistributedMapJoinHint(shard_count=5)
+    skew_join1 = SkewJoinHint()
+    skew_join2 = SkewJoinHint(columns=[0])
+    skew_join3 = SkewJoinHint(columns=[{"a": 4}, {"a": 6}])
+    skew_join4 = SkewJoinHint(columns=[{"a": 4, "b": "test"}, {"a": 5, "b": "hello"}])
     parameters = [
         {},
         {"how": "left", "right_on": "x", "left_index": True},
+        {
+            "how": "left",
+            "right_on": "x",
+            "left_index": True,
+            "left_hint": mapjoin,
+            "right_hint": mapjoin,
+        },
         {"how": "right", "left_on": "a", "right_index": True},
+        {
+            "how": "right",
+            "left_on": "a",
+            "right_index": True,
+            "left_hint": mapjoin,
+            "right_hint": dist_mapjoin1,
+        },
         {"how": "left", "left_on": "a", "right_on": "x"},
+        {"how": "left", "left_on": "a", "right_on": "x", "left_hint": skew_join1},
         {"how": "right", "left_on": "a", "right_index": True},
+        {"how": "right", "left_on": "a", "right_index": True, "right_hint": skew_join2},
         {"how": "right", "on": "a"},
+        {"how": "right", "on": "a", "right_hint": skew_join3},
         {"how": "inner", "on": ["a", "b"]},
+        {"how": "inner", "on": ["a", "b"], "left_hint": skew_join4},
     ]
     for kw in parameters:
@@ -213,3 +239,100 @@ def test_concat():
     mdf2 = from_pandas(df2, chunk_size=3)
     r = concat([mdf1, mdf2], join="inner")
     assert r.shape == (20, 3)
+def test_invalid_join_hint():
+    df1 = pd.DataFrame(
+        np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"]
+    )
+    df2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
+    mdf1 = from_pandas(df1, chunk_size=2)
+    mdf2 = from_pandas(df2, chunk_size=3)
+    # type error
+    parameters = [
+        {"how": "left", "right_on": "x", "left_index": True, "left_hint": [1]},
+        {
+            "how": "left",
+            "right_on": "x",
+            "left_index": True,
+            "left_hint": {"key": "value"},
+        },
+        {
+            "how": "right",
+            "left_on": "a",
+            "right_index": True,
+            "right_hint": SkewJoinHint(columns=2),
+        },
+        {
+            "how": "left",
+            "left_on": "a",
+            "right_on": "x",
+            "left_hint": SkewJoinHint(columns="a"),
+        },
+        {
+            "how": "right",
+            "left_on": "a",
+            "right_index": True,
+            "right_hint": SkewJoinHint(columns=["0", []]),
+        },
+    ]
+    for kw in parameters:
+        print(kw)
+        with pytest.raises(TypeError):
+            mdf1.merge(mdf2, **kw)
+    # value error
+    parameters = [
+        # mapjoin can't working with skew join
+        {
+            "how": "left",
+            "right_on": "x",
+            "left_index": True,
+            "left_hint": MapJoinHint(),
+            "right_hint": SkewJoinHint(),
+        },
+        # right join can't apply to skew join left frame
+        {
+            "how": "right",
+            "left_on": "a",
+            "right_index": True,
+            "left_hint": SkewJoinHint(),
+        },
+        # invalid columns
+        {
+            "how": "left",
+            "left_on": "a",
+            "right_on": "x",
+            "left_hint": SkewJoinHint(columns=["b"]),
+        },
+        # invalid index level
+        {
+            "how": "right",
+            "left_on": "a",
+            "right_index": True,
+            "right_hint": SkewJoinHint(columns=[5]),
+        },
+        # unmatched skew join columns
+        {
+            "how": "right",
+            "left_on": "a",
+            "right_index": True,
+            "right_hint": SkewJoinHint(columns=[{0: "value1"}, {1: "value2"}]),
+        },
+        # invalid dist_mapjoin shard_count
+        {"how": "right", "on": "a", "right_hint": DistributedMapJoinHint()},
+        # all can't work with outer join
+        {"how": "outer", "on": ["a", "b"], "left_hint": MapJoinHint()},
+        {
+            "how": "outer",
+            "on": ["a", "b"],
+            "left_hint": DistributedMapJoinHint(shard_count=5),
+        },
+        {"how": "outer", "on": ["a", "b"], "left_hint": SkewJoinHint()},
+    ]
+    for kw in parameters:
+        with pytest.raises(ValueError):
+            mdf1.merge(mdf2, **kw)

maxframe/dataframe/misc/apply.py CHANGED Viewed

@@ -170,6 +170,8 @@ class ApplyOperator(
         elif self.output_types[0] == OutputType.dataframe:
             shape = [np.nan, np.nan]
             shape[1 - self.axis] = df.shape[1 - self.axis]
+            if self.axis == 1:
+                shape[1] = len(dtypes)
             shape = tuple(shape)
         else:
             shape = (df.shape[1 - self.axis],)
@@ -317,6 +319,7 @@ def df_apply(
     skip_infer=False,
     **kwds,
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/50
     """
     Apply a function along an axis of the DataFrame.
@@ -442,20 +445,12 @@ def df_apply(
     B    27
     dtype: int64
-    >>> df.apply(np.sum, axis=1).execute()
+    >>> df.apply(lambda row: int(np.sum(row)), axis=1).execute()
     0    13
     1    13
     2    13
     dtype: int64
-    Returning a list-like will result in a Series
-    >>> df.apply(lambda x: [1, 2], axis=1).execute()
-    0    [1, 2]
-    1    [1, 2]
-    2    [1, 2]
-    dtype: object
     Passing ``result_type='expand'`` will expand list-like results
     to columns of a Dataframe
@@ -469,7 +464,7 @@ def df_apply(
     ``result_type='expand'``. The resulting column names
     will be the Series index.
-    >>> df.apply(lambda x: md.Series([1, 2], index=['foo', 'bar']), axis=1).execute()
+    >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1).execute()
        foo  bar
     0    1    2
     1    1    2

maxframe/dataframe/misc/case_when.py CHANGED Viewed

@@ -99,7 +99,7 @@ def case_when(series, caselist):
     >>> b = md.Series([0, 3, 4, 5])
     >>> c.case_when(caselist=[(a.gt(0), a),  # condition, replacement
-    ...                       (b.gt(0), b)])
+    ...                       (b.gt(0), b)]).execute()
     0    6
     1    3
     2    1

maxframe/dataframe/misc/describe.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import numpy as np
 import pandas as pd
-from ... import opcodes as OperandDef
+from ... import opcodes
 from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
 from ..core import SERIES_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -23,7 +23,7 @@ from ..utils import build_empty_df, parse_index
 class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
-    _op_type_ = OperandDef.DESCRIBE
+    _op_type_ = opcodes.DESCRIBE
     input = KeyField("input", default=None)
     percentiles = ListField("percentiles", FieldTypes.float64, default=None)

maxframe/dataframe/misc/drop_duplicates.py CHANGED Viewed

@@ -37,16 +37,15 @@ class DataFrameDropDuplicates(DuplicateOperand):
             shape += (3,)
         return shape
-    @classmethod
-    def _gen_tileable_params(cls, op: "DataFrameDropDuplicates", input_params):
+    def _gen_tileable_params(self, op: "DataFrameDropDuplicates", input_params):
         params = input_params.copy()
-        if op.ignore_index:
+        if op.ignore_index and self._output_types[0] != OutputType.index:
             params["index_value"] = parse_index(pd.RangeIndex(-1))
         else:
             params["index_value"] = gen_unknown_index_value(
                 input_params["index_value"], op.keep, op.subset, type(op).__name__
             )
-        params["shape"] = cls._get_shape(input_params["shape"], op)
+        params["shape"] = self._get_shape(input_params["shape"], op)
         return params
     def __call__(self, inp, inplace=False):
@@ -105,6 +104,7 @@ def df_drop_duplicates(
 def series_drop_duplicates(
     series, keep="first", inplace=False, ignore_index=False, method="auto"
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/12
     """
     Return Series with duplicate values removed.
@@ -148,27 +148,6 @@ def series_drop_duplicates(
     5     hippo
     Name: animal, dtype: object
-    With the 'keep' parameter, the selection behaviour of duplicated values
-    can be changed. The value 'first' keeps the first occurrence for each
-    set of duplicated entries. The default value of keep is 'first'.
-    >>> s.drop_duplicates().execute()
-    0      lame
-    1       cow
-    3    beetle
-    5     hippo
-    Name: animal, dtype: object
-    The value 'last' for parameter 'keep' keeps the last occurrence for
-    each set of duplicated entries.
-    >>> s.drop_duplicates(keep='last').execute()
-    1       cow
-    3    beetle
-    4      lame
-    5     hippo
-    Name: animal, dtype: object
     The value ``False`` for parameter 'keep' discards all sets of
     duplicated entries. Setting the value of 'inplace' to ``True`` performs
     the operation inplace and returns ``None``.

maxframe/dataframe/misc/eval.py CHANGED Viewed

@@ -120,6 +120,10 @@ class CollectionVisitor(ast.NodeVisitor):
         if obj_name in self.env:
             self.referenced_vars.add(obj_name)
             return self.env[obj_name]
+        try:
+            return self.target[obj_name]
+        except KeyError:
+            pass
         raise KeyError(f"name {obj_name} is not defined")
     def visit(self, node):

maxframe/dataframe/misc/memory_usage.py CHANGED Viewed

@@ -58,7 +58,7 @@ class DataFrameMemoryUsage(DataFrameOperator, DataFrameOperatorMixin):
         """
         if df_or_series.ndim == 1:
             # the input data is a series, a Scalar will be returned
-            return self.new_scalar([df_or_series], dtype=np.dtype(np.int_))
+            return self.new_scalar([df_or_series], dtype=np.dtype(int))
         else:
             # the input data is a DataFrame, a Scalar will be returned
             # calculate shape of returning series given ``op.index``
@@ -71,7 +71,7 @@ class DataFrameMemoryUsage(DataFrameOperator, DataFrameOperatorMixin):
                 [df_or_series],
                 index_value=self._adapt_index(df_or_series.columns_value),
                 shape=new_shape,
-                dtype=np.dtype(np.int_),
+                dtype=np.dtype(int),
             )

maxframe/dataframe/misc/pct_change.py CHANGED Viewed

@@ -18,6 +18,7 @@ from ..utils import validate_axis
 def pct_change(
     df_or_series, periods=1, fill_method="pad", limit=None, freq=None, **kwargs
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/32
     """
     Percentage change between the current and a prior element.
@@ -50,89 +51,6 @@ def pct_change(
     DataFrame.diff : Compute the difference of two elements in a DataFrame.
     Series.shift : Shift the index by some number of periods.
     DataFrame.shift : Shift the index by some number of periods.
-    Examples
-    --------
-    **Series**
-    >>> import maxframe.dataframe as md
-    >>> s = md.Series([90, 91, 85])
-    >>> s.execute()
-    0    90
-    1    91
-    2    85
-    dtype: int64
-    >>> s.pct_change().execute()
-    0         NaN
-    1    0.011111
-    2   -0.065934
-    dtype: float64
-    >>> s.pct_change(periods=2).execute()
-    0         NaN
-    1         NaN
-    2   -0.055556
-    dtype: float64
-    See the percentage change in a Series where filling NAs with last
-    valid observation forward to next valid.
-    >>> s = md.Series([90, 91, None, 85])
-    >>> s.execute()
-    0    90.0
-    1    91.0
-    2     NaN
-    3    85.0
-    dtype: float64
-    >>> s.pct_change(fill_method='ffill').execute()
-    0         NaN
-    1    0.011111
-    2    0.000000
-    3   -0.065934
-    dtype: float64
-    **DataFrame**
-    Percentage change in French franc, Deutsche Mark, and Italian lira from
-    1980-01-01 to 1980-03-01.
-    >>> df = md.DataFrame({
-    ...     'FR': [4.0405, 4.0963, 4.3149],
-    ...     'GR': [1.7246, 1.7482, 1.8519],
-    ...     'IT': [804.74, 810.01, 860.13]},
-    ...     index=['1980-01-01', '1980-02-01', '1980-03-01'])
-    >>> df.execute()
-                    FR      GR      IT
-    1980-01-01  4.0405  1.7246  804.74
-    1980-02-01  4.0963  1.7482  810.01
-    1980-03-01  4.3149  1.8519  860.13
-    >>> df.pct_change().execute()
-                      FR        GR        IT
-    1980-01-01       NaN       NaN       NaN
-    1980-02-01  0.013810  0.013684  0.006549
-    1980-03-01  0.053365  0.059318  0.061876
-    Percentage of change in GOOG and APPL stock volume. Shows computing
-    the percentage change between columns.
-    >>> df = md.DataFrame({
-    ...     '2016': [1769950, 30586265],
-    ...     '2015': [1500923, 40912316],
-    ...     '2014': [1371819, 41403351]},
-    ...     index=['GOOG', 'APPL'])
-    >>> df.execute()
-              2016      2015      2014
-    GOOG   1769950   1500923   1371819
-    APPL  30586265  40912316  41403351
-    >>> df.pct_change(axis='columns').execute()
-          2016      2015      2014
-    GOOG   NaN -0.151997 -0.086016
-    APPL   NaN  0.337604  0.012002
     """
     axis = validate_axis(kwargs.pop("axis", 0))

maxframe/dataframe/misc/tests/test_misc.py CHANGED Viewed

@@ -18,6 +18,7 @@ import pytest
 from .... import opcodes
 from ....core import OutputType
+from ....dataframe import DataFrame
 from ....tensor.core import TENSOR_TYPE
 from ... import eval as maxframe_eval
 from ... import get_dummies, to_numeric
@@ -430,6 +431,28 @@ def test_case_when():
     assert isinstance(col.inputs[2].op, DataFrameGreater)
+def test_apply():
+    df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
+    keys = [1, 2]
+    def f(x, keys):
+        if x["a"] in keys:
+            return [1, 0]
+        else:
+            return [0, 1]
+    apply_df = df[["a"]].apply(
+        f,
+        output_type="dataframe",
+        dtypes=pd.Series(["int64", "int64"]),
+        axis=1,
+        result_type="expand",
+        keys=keys,
+    )
+    assert apply_df.shape == (3, 2)
 def test_pivot_table():
     from ...groupby.aggregation import DataFrameGroupByAgg
     from ...misc.pivot_table import DataFramePivotTable

maxframe/dataframe/misc/transform.py CHANGED Viewed

@@ -228,21 +228,6 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
     0  1  2
     1  2  3
     2  3  4
-    Even though the resulting DataFrame must have the same length as the
-    input DataFrame, it is possible to provide several input functions:
-    >>> s = md.Series(range(3))
-    >>> s.execute()
-    0    0
-    1    1
-    2    2
-    dtype: int64
-    >>> s.transform([mt.sqrt, mt.exp]).execute()
-           sqrt        exp
-    0  0.000000   1.000000
-    1  1.000000   2.718282
-    2  1.414214   7.389056
     """
     op = TransformOperator(
         func=func,
@@ -265,6 +250,7 @@ def series_transform(
     dtype=None,
     **kwargs
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/10
     """
     Call ``func`` on self producing a Series with transformed values.
@@ -332,21 +318,6 @@ def series_transform(
     0  1  2
     1  2  3
     2  3  4
-    Even though the resulting Series must have the same length as the
-    input Series, it is possible to provide several input functions:
-    >>> s = md.Series(range(3))
-    >>> s.execute()
-    0    0
-    1    1
-    2    2
-    dtype: int64
-    >>> s.transform([mt.sqrt, mt.exp]).execute()
-       sqrt        exp
-    0  0.000000   1.000000
-    1  1.000000   2.718282
-    2  1.414214   7.389056
     """
     op = TransformOperator(
         func=func,

maxframe/dataframe/misc/value_counts.py CHANGED Viewed

@@ -85,6 +85,7 @@ def value_counts(
     dropna=True,
     method="auto",
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/33
     """
     Return a Series containing counts of unique values.
@@ -125,9 +126,8 @@ def value_counts(
     Examples
     --------
     >>> import maxframe.dataframe as md
-    >>> import maxframe.tensor as mt
-    >>> s = md.Series([3, 1, 2, 3, 4, mt.nan])
+    >>> import numpy as np
+    >>> s = md.Series([3, 1, 2, 3, 4, np.nan])
     >>> s.value_counts().execute()
     3.0    2
     4.0    1
@@ -138,7 +138,7 @@ def value_counts(
     With `normalize` set to `True`, returns the relative frequency by
     dividing all values by the sum of values.
-    >>> s = md.Series([3, 1, 2, 3, 4, mt.nan])
+    >>> s = md.Series([3, 1, 2, 3, 4, np.nan])
     >>> s.value_counts(normalize=True).execute()
     3.0    0.4
     4.0    0.2
@@ -146,19 +146,6 @@ def value_counts(
     1.0    0.2
     dtype: float64
-    **bins**
-    Bins can be useful for going from a continuous variable to a
-    categorical variable; instead of counting unique
-    apparitions of values, divide the index in the specified
-    number of half-open bins.
-    >>> s.value_counts(bins=3).execute()
-    (2.0, 3.0]      2
-    (0.996, 2.0]    2
-    (3.0, 4.0]      1
-    dtype: int64
     **dropna**
     With `dropna` set to `False` we can also see NaN index values.

maxframe/dataframe/missing/dropna.py CHANGED Viewed

@@ -234,7 +234,7 @@ def series_dropna(series, axis=0, inplace=False, how=None):
     Empty strings are not considered NA values. ``None`` is considered an
     NA value.
-    >>> ser = md.Series([np.NaN, 2, md.NaT, '', None, 'I stay'])
+    >>> ser = md.Series([np.NaN, '2', md.NaT, '', None, 'I stay'])
     >>> ser.execute()
     0       NaN
     1         2

maxframe/dataframe/missing/fillna.py CHANGED Viewed

@@ -132,11 +132,11 @@ def fillna(
     --------
     >>> import maxframe.tensor as mt
     >>> import maxframe.dataframe as md
-    >>> df = md.DataFrame([[mt.nan, 2, mt.nan, 0],
-    ...                    [3, 4, mt.nan, 1],
-    ...                    [mt.nan, mt.nan, mt.nan, 5],
-    ...                    [mt.nan, 3, mt.nan, 4]],
-    ...                   columns=list('ABCD'))
+    >>> df = md.DataFrame([[np.nan, 2, np.nan, 0],
+                           [3, 4, np.nan, 1],
+                           [np.nan, np.nan, np.nan, 5],
+                           [np.nan, 3, np.nan, 4]],
+                          columns=list('ABCD'))
     >>> df.execute()
          A    B   C  D
     0  NaN  2.0 NaN  0

maxframe/dataframe/sort/sort_values.py CHANGED Viewed

@@ -67,6 +67,7 @@ def dataframe_sort_values(
     parallel_kind="PSRS",
     psrs_kinds=None,
 ):
+    # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/15
     """
     Sort by the values along either axis.
@@ -152,17 +153,6 @@ def dataframe_sort_values(
     0   A    2    0
     1   A    1    1
     3   NaN  8    4
-    Putting NAs first
-    >>> df.sort_values(by='col1', ascending=False, na_position='first').execute()
-        col1 col2 col3
-    3   NaN  8    4
-    4   D    7    2
-    5   C    4    3
-    2   B    9    9
-    0   A    2    0
-    1   A    1    1
     """
     if na_position not in ["last", "first"]:  # pragma: no cover

maxframe/dataframe/statistics/corr.py CHANGED Viewed

@@ -43,7 +43,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
     def __call__(self, df_or_series):
         if isinstance(df_or_series, SERIES_TYPE):
             inputs = filter_inputs([df_or_series, self.other])
-            return self.new_scalar(inputs, dtype=np.dtype(np.float_))
+            return self.new_scalar(inputs, dtype=np.dtype(float))
         else:
             def _filter_numeric(obj):
@@ -60,7 +60,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
             inputs = filter_inputs([df_or_series, self.other])
             if self.axis is None:
                 dtypes = pd.Series(
-                    [np.dtype(np.float_)] * len(df_or_series.dtypes),
+                    [np.dtype(float)] * len(df_or_series.dtypes),
                     index=df_or_series.dtypes.index,
                 )
                 return self.new_dataframe(
@@ -85,7 +85,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
                 return self.new_series(
                     inputs,
                     shape=shape,
-                    dtype=np.dtype(np.float_),
+                    dtype=np.dtype(float),
                     index_value=new_index_value,
                 )

maxframe/dataframe/statistics/quantile.py CHANGED Viewed

@@ -14,8 +14,9 @@
 import numpy as np
 import pandas as pd
+from pandas.core.dtypes.cast import find_common_type
-from ... import opcodes as OperandDef
+from ... import opcodes
 from ...core import ENTITY_TYPE
 from ...serialization.serializables import (
     AnyField,
@@ -32,11 +33,11 @@ from ...tensor.datasource import tensor as astensor
 from ...tensor.statistics.quantile import quantile as tensor_quantile
 from ..core import DATAFRAME_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
-from ..utils import build_empty_df, find_common_type, parse_index, validate_axis
+from ..utils import build_empty_df, parse_index, validate_axis
 class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
-    _op_type_ = OperandDef.QUANTILE
+    _op_type_ = opcodes.QUANTILE
     input = KeyField("input", default=None)
     q = AnyField("q", default=None)
@@ -259,6 +260,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
 def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
+    # FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
     """
     Return values at the given quantile over requested axis.
@@ -309,20 +311,6 @@ def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="line
            a     b
     0.1  1.3   3.7
     0.5  2.5  55.0
-    Specifying `numeric_only=False` will also compute the quantile of
-    datetime and timedelta data.
-    >>> df = md.DataFrame({'A': [1, 2],
-    ...                    'B': [md.Timestamp('2010'),
-    ...                          md.Timestamp('2011')],
-    ...                    'C': [md.Timedelta('1 days'),
-    ...                          md.Timedelta('2 days')]})
-    >>> df.quantile(0.5, numeric_only=False).execute()
-    A                    1.5
-    B    2010-07-02 12:00:00
-    C        1 days 12:00:00
-    Name: 0.5, dtype: object
     """
     if isinstance(q, ENTITY_TYPE):
         q = astensor(q)

maxframe 0.1.0b5__cp37-cp37m-win32.whl → 1.0.0rc2__cp37-cp37m-win32.whl

Potentially problematic release.

maxframe 0.1.0b5cp37-cp37m-win32.whl → 1.0.0rc2cp37-cp37m-win32.whl