PyPI - maxframe - Versions diffs - 1.0.0rc1__cp38-cp38-win32.whl → 1.0.0rc2__cp38-cp38-win32.whl - Mend

maxframe 1.0.0rc1cp38-cp38-win32.whl → 1.0.0rc2cp38-cp38-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (64) hide show

maxframe/_utils.cp38-win32.pyd +0 -0
maxframe/codegen.py +0 -4
maxframe/config/config.py +34 -2
maxframe/config/validators.py +1 -0
maxframe/conftest.py +2 -0
maxframe/core/entity/objects.py +1 -1
maxframe/core/graph/core.cp38-win32.pyd +0 -0
maxframe/dataframe/__init__.py +1 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +5 -55
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
maxframe/dataframe/core.py +5 -5
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +6 -0
maxframe/dataframe/datasource/read_odps_table.py +2 -1
maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/rename.py +3 -37
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/merge/merge.py +236 -2
maxframe/dataframe/merge/tests/test_merge.py +123 -0
maxframe/dataframe/misc/apply.py +3 -10
maxframe/dataframe/misc/case_when.py +1 -1
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +4 -25
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/quantile.py +5 -17
maxframe/dataframe/utils.py +4 -7
maxframe/learn/contrib/xgboost/dmatrix.py +2 -2
maxframe/learn/contrib/xgboost/predict.py +2 -2
maxframe/learn/contrib/xgboost/train.py +2 -2
maxframe/lib/mmh3.cp38-win32.pyd +0 -0
maxframe/odpsio/__init__.py +1 -1
maxframe/odpsio/arrow.py +8 -4
maxframe/odpsio/schema.py +10 -7
maxframe/odpsio/tableio.py +388 -14
maxframe/odpsio/tests/test_schema.py +16 -15
maxframe/odpsio/tests/test_tableio.py +48 -21
maxframe/protocol.py +40 -2
maxframe/serialization/core.cp38-win32.pyd +0 -0
maxframe/serialization/serializables/core.py +48 -9
maxframe/tensor/__init__.py +59 -0
maxframe/tensor/base/unique.py +2 -2
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tests/utils.py +11 -2
maxframe/utils.py +17 -9
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/METADATA +74 -1
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/RECORD +64 -64
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/WHEEL +1 -1
maxframe_client/fetcher.py +38 -27
maxframe_client/session/odps.py +5 -5
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +13 -2
{maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/top_level.txt +0 -0

maxframe/_utils.cp38-win32.pyd CHANGED Viewed

Binary file

maxframe/codegen.py CHANGED Viewed

@@ -205,12 +205,8 @@ class BigDagCodeContext(metaclass=abc.ABCMeta):
         return self._session_id
     def register_udf(self, udf: AbstractUDF):
-        from maxframe_framedriver.services.session import SessionManager
         udf.session_id = self._session_id
         self._udfs[udf.name] = udf
-        if self._session_id and SessionManager.initialized():
-            SessionManager.instance().register_udf(self._session_id, udf)
     def get_udfs(self) -> List[AbstractUDF]:
         return list(self._udfs.values())

maxframe/config/config.py CHANGED Viewed

@@ -19,6 +19,15 @@ import warnings
 from copy import deepcopy
 from typing import Any, Dict, Optional, Union
+from odps.lib import tzlocal
+try:
+    from zoneinfo import available_timezones
+except ImportError:
+    from pytz import all_timezones
+    available_timezones = lambda: all_timezones
 from ..utils import get_python_tag
 from .validators import (
     ValidatorType,
@@ -28,6 +37,7 @@ from .validators import (
     is_dict,
     is_in,
     is_integer,
+    is_non_negative_integer,
     is_null,
     is_numeric,
     is_string,
@@ -37,11 +47,12 @@ _DEFAULT_REDIRECT_WARN = "Option {source} has been replaced by {target} and migh
 _DEFAULT_MAX_ALIVE_SECONDS = 3 * 24 * 3600
 _DEFAULT_MAX_IDLE_SECONDS = 3600
 _DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS = 120
+_DEFAULT_SPE_FAILURE_RETRY_TIMES = 5
 _DEFAULT_UPLOAD_BATCH_SIZE = 4096
 _DEFAULT_TEMP_LIFECYCLE = 1
 _DEFAULT_TASK_START_TIMEOUT = 60
 _DEFAULT_TASK_RESTART_TIMEOUT = 300
-_DEFAULT_LOGVIEW_HOURS = 24 * 60
+_DEFAULT_LOGVIEW_HOURS = 24 * 30
 class OptionError(Exception):
@@ -297,13 +308,28 @@ class Config:
         return {k: v for k, v in res.items() if k in self._remote_options}
+def _get_legal_local_tz_name() -> Optional[str]:
+    """Sometimes we may get illegal tz name from tzlocal.get_localzone()"""
+    tz_name = str(tzlocal.get_localzone())
+    if tz_name not in available_timezones():
+        return None
+    return tz_name
 default_options = Config()
 default_options.register_option(
     "execution_mode", "trigger", validator=is_in(["trigger", "eager"])
 )
+default_options.register_option("use_common_table", False, validator=is_bool)
 default_options.register_option(
     "python_tag", get_python_tag(), validator=is_string, remote=True
 )
+default_options.register_option(
+    "local_timezone",
+    _get_legal_local_tz_name(),
+    validator=any_validator(is_null, is_in(set(available_timezones()))),
+    remote=True,
+)
 default_options.register_option(
     "session.logview_hours", _DEFAULT_LOGVIEW_HOURS, validator=is_integer, remote=True
 )
@@ -378,7 +404,13 @@ default_options.register_option(
 default_options.register_option(
     "spe.operation_timeout_seconds",
     _DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS,
-    validator=is_integer,
+    validator=is_non_negative_integer,
+    remote=True,
+)
+default_options.register_option(
+    "spe.failure_retry_times",
+    _DEFAULT_SPE_FAILURE_RETRY_TIMES,
+    validator=is_non_negative_integer,
     remote=True,
 )

maxframe/config/validators.py CHANGED Viewed

@@ -40,6 +40,7 @@ is_numeric = lambda x: isinstance(x, (int, float))
 is_string = lambda x: isinstance(x, str)
 is_dict = lambda x: isinstance(x, dict)
 is_positive_integer = lambda x: is_integer(x) and x > 0
+is_non_negative_integer = lambda x: is_integer(x) and x >= 0
 def is_in(vals):

maxframe/conftest.py CHANGED Viewed

@@ -87,6 +87,7 @@ def oss_config():
         oss_secret_access_key = config.get("oss", "secret_access_key")
         oss_bucket_name = config.get("oss", "bucket_name")
         oss_endpoint = config.get("oss", "endpoint")
+        oss_rolearn = config.get("oss", "rolearn")
         config.oss_config = (
             oss_access_id,
@@ -99,6 +100,7 @@ def oss_config():
         auth = oss2.Auth(oss_access_id, oss_secret_access_key)
         config.oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
+        config.oss_rolearn = oss_rolearn
         return config
     except (ConfigParser.NoSectionError, ConfigParser.NoOptionError, ImportError):
         return None

maxframe/core/entity/objects.py CHANGED Viewed

@@ -23,7 +23,7 @@ class ObjectData(TileableData, _ToObjectMixin):
     __slots__ = ()
     type_name = "Object"
     # workaround for removed field since v0.1.0b5
-    # todo remove this when all versions below v0.1.0b5 is eliminated
+    # todo remove this when all versions below v1.0.0rc1 is eliminated
     _legacy_deprecated_non_primitives = ["_chunks"]
     def __init__(self, op=None, nsplits=None, **kw):

maxframe/core/graph/core.cp38-win32.pyd CHANGED Viewed

Binary file

maxframe/dataframe/__init__.py CHANGED Viewed

@@ -54,7 +54,7 @@ from .reduction import CustomReduction, unique
 from .tseries.to_datetime import to_datetime
 try:
-    from pandas import NA, Timestamp
+    from pandas import NA, NaT, Timestamp
 except ImportError:  # pragma: no cover
     pass

maxframe/dataframe/arithmetic/around.py CHANGED Viewed

@@ -43,20 +43,20 @@ def around(df, decimals=0, *args, **kwargs):
     return op(df)
+# FIXME Series input of decimals not supported yet
 around.__frame_doc__ = """
 Round a DataFrame to a variable number of decimal places.
 Parameters
 ----------
-decimals : int, dict, Series
+decimals : int, dict
     Number of decimal places to round each column to. If an int is
     given, round each column to the same number of places.
     Otherwise dict and Series round to variable numbers of places.
     Column names should be in the keys if `decimals` is a
-    dict-like, or in the index if `decimals` is a Series. Any
-    columns not included in `decimals` will be left as is. Elements
-    of `decimals` which are not columns of the input will be
-    ignored.
+    dict-like. Any columns not included in `decimals` will be left
+    as is. Elements of `decimals` which are not columns of the
+    input will be ignored.
 *args
     Additional keywords have no effect but might be accepted for
     compatibility with numpy.
@@ -107,18 +107,6 @@ places as value
 1   0.0   1.0
 2   0.7   0.0
 3   0.2   0.0
-Using a Series, the number of places for specific columns can be
-specified with the column names as index and the number of
-decimal places as value
->>> decimals = md.Series([0, 1], index=['cats', 'dogs'])
->>> df.round(decimals).execute()
-    dogs  cats
-0   0.2   0.0
-1   0.0   1.0
-2   0.7   0.0
-3   0.2   0.0
 """
 around.__series_doc__ = """
 Round each value in a Series to the given number of decimals.

maxframe/dataframe/arithmetic/core.py CHANGED Viewed

@@ -39,7 +39,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
         raise NotImplementedError
     @classmethod
-    def _calc_properties(cls, x1, x2=None, axis="columns"):
+    def _calc_properties(cls, x1, x2=None, axis="columns", level=None):
         if isinstance(x1, DATAFRAME_TYPE) and (
             x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
         ):
@@ -108,7 +108,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                     index = copy.copy(x1.index_value)
                     index_shape = x1.shape[0]
                 else:
-                    index = infer_index_value(x1.index_value, x2.index_value)
+                    index = infer_index_value(
+                        x1.index_value, x2.index_value, level=level
+                    )
                     if index.key == x1.index_value.key == x2.index_value.key and (
                         not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
                     ):
@@ -141,7 +143,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                         column_shape = len(dtypes)
                     else:  # pragma: no cover
                         dtypes = x1.dtypes  # FIXME
-                        columns = infer_index_value(x1.columns_value, x2.index_value)
+                        columns = infer_index_value(
+                            x1.columns_value, x2.index_value, level=level
+                        )
                         column_shape = np.nan
             else:
                 assert axis == "index" or axis == 0
@@ -169,7 +173,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                                 ],
                                 index=x1.dtypes.index,
                             )
-                        index = infer_index_value(x1.index_value, x2.index_value)
+                        index = infer_index_value(
+                            x1.index_value, x2.index_value, level=level
+                        )
                         index_shape = np.nan
             return {
                 "shape": (index_shape, column_shape),
@@ -187,7 +193,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                     index = copy.copy(x1.index_value)
                     index_shape = x1.shape[0]
                 else:
-                    index = infer_index_value(x1.index_value, x2.index_value)
+                    index = infer_index_value(
+                        x1.index_value, x2.index_value, level=level
+                    )
                     if index.key == x1.index_value.key == x2.index_value.key and (
                         not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
                     ):
@@ -237,14 +245,14 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
         self._check_inputs(x1, x2)
         if isinstance(x1, DATAFRAME_TYPE) or isinstance(x2, DATAFRAME_TYPE):
             df1, df2 = (x1, x2) if isinstance(x1, DATAFRAME_TYPE) else (x2, x1)
-            kw = self._calc_properties(df1, df2, axis=self.axis)
+            kw = self._calc_properties(df1, df2, axis=self.axis, level=self.level)
             if not pd.api.types.is_scalar(df2):
                 return self.new_dataframe([x1, x2], **kw)
             else:
                 return self.new_dataframe([df1], **kw)
         if isinstance(x1, SERIES_TYPE) or isinstance(x2, SERIES_TYPE):
             s1, s2 = (x1, x2) if isinstance(x1, SERIES_TYPE) else (x2, x1)
-            kw = self._calc_properties(s1, s2)
+            kw = self._calc_properties(s1, s2, level=self.level)
             if not pd.api.types.is_scalar(s2):
                 return self.new_series([x1, x2], **kw)
             else:

maxframe/dataframe/arithmetic/docstring.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# FIXME：https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/17
 _flex_doc_FRAME = """
 Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 Equivalent to ``{equiv}``, but with support to substitute a fill_value
@@ -127,44 +128,15 @@ circle          0
 triangle        3
 rectangle       4
->>> (df * other).execute()
-           angles  degrees
-circle          0      NaN
-triangle        9      NaN
-rectangle      16      NaN
 >>> df.mul(other, fill_value=0).execute()
            angles  degrees
 circle          0      0.0
 triangle        9      0.0
 rectangle      16      0.0
-Divide by a MultiIndex by level.
->>> df_multindex = md.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
-...                              'degrees': [360, 180, 360, 360, 540, 720]}},
-...                             index=[['A', 'A', 'A', 'B', 'B', 'B'],
-...                                    ['circle', 'triangle', 'rectangle',
-...                                     'square', 'pentagon', 'hexagon']])
->>> df_multindex.execute()
-             angles  degrees
-A circle          0      360
-  triangle        3      180
-  rectangle       4      360
-B square          4      360
-  pentagon        5      540
-  hexagon         6      720
->>> df.div(df_multindex, level=1, fill_value=0).execute()
-             angles  degrees
-A circle        NaN      1.0
-  triangle      1.0      1.0
-  rectangle     1.0      1.0
-B square        0.0      0.0
-  pentagon      0.0      0.0
-  hexagon       0.0      0.0
 """
+# FIXME：https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/28
 _flex_doc_SERIES = """
 Return {desc} of series and other, element-wise (binary operator `{op_name}`).
@@ -213,6 +185,7 @@ e    NaN
 dtype: float64
 """
+# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/48
 _flex_comp_doc_FRAME = """
 Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
@@ -257,7 +230,8 @@ Mismatched indices will be unioned together.
 Examples
 --------
->>> df = pd.DataFrame({{'cost': [250, 150, 100],
+>>> import maxframe.dataframe as md
+>>> df = md.DataFrame({{'cost': [250, 150, 100],
 ...                    'revenue': [100, 250, 300]}},
 ...                   index=['A', 'B', 'C'])
 >>> df.execute()
@@ -332,30 +306,6 @@ A  False    False
 B  False    False
 C  False     True
 D  False    False
-Compare to a MultiIndex by level.
->>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
-...                              'revenue': [100, 250, 300, 200, 175, 225]}},
-...                             index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
-...                                    ['A', 'B', 'C', 'A', 'B', 'C']])
->>> df_multindex.execute()
-      cost  revenue
-Q1 A   250      100
-   B   150      250
-   C   100      300
-Q2 A   150      200
-   B   300      175
-   C   220      225
->>> df.le(df_multindex, level=1).execute()
-       cost  revenue
-Q1 A   True     True
-   B   True     True
-   C   True     True
-Q2 A  False     True
-   B   True    False
-   C   True    False
 """

maxframe/dataframe/arithmetic/tests/test_arithmetic.py CHANGED Viewed

@@ -239,6 +239,28 @@ def test_dataframe_and_series_with_shuffle(func_name, func_opts):
     assert df2.columns_value.key != df1.columns_value.key
+@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
+def test_dataframe_and_series_with_multiindex(func_name, func_opts):
+    data1 = pd.DataFrame(
+        np.random.rand(10, 10),
+        index=pd.MultiIndex.from_arrays(
+            [list("AAAAABBBBB"), [4, 9, 3, 2, 1, 5, 8, 6, 7, 10]]
+        ),
+        columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7],
+    )
+    data1 = to_boolean_if_needed(func_opts.func_name, data1)
+    df1 = from_pandas(data1, chunk_size=5)
+    s1 = from_pandas_series(data1[10].reset_index(level=0, drop=True), chunk_size=6)
+    df2 = getattr(df1, func_opts.func_name)(s1, level=1, axis=0)
+    # test df2's index and columns
+    assert df2.shape == (np.nan, df1.shape[1])
+    assert df2.index_value.key != df1.index_value.key
+    assert df2.index_value.names == df1.index_value.names
+    assert df2.columns_value.key == df1.columns_value.key
 @pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
 def test_series_and_series_with_align_map(func_name, func_opts):
     data1 = pd.DataFrame(

maxframe/dataframe/core.py CHANGED Viewed

@@ -1086,11 +1086,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
         --------
         >>> import maxframe.dataframe as md
         >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
-        >>> s.ndim.execute()
+        >>> s.ndim
         1
         >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-        >>> df.ndim.execute()
+        >>> df.ndim
         2
         """
         return super().ndim
@@ -1520,7 +1520,7 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
         self._columns_value = parse_index(dtypes.index, store_data=True)
         self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
         new_shape = list(self._shape)
-        new_shape[0] = len(dtypes)
+        new_shape[-1] = len(dtypes)
         self._shape = tuple(new_shape)
     @property
@@ -1761,11 +1761,11 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
         --------
         >>> import maxframe.dataframe as md
         >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
-        >>> s.ndim.execute()
+        >>> s.ndim
         1
         >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-        >>> df.ndim.execute()
+        >>> df.ndim
         2
         """
         return super().ndim

maxframe/dataframe/datasource/date_range.py CHANGED Viewed

@@ -22,7 +22,7 @@ from pandas._libs.tslibs import timezones
 from pandas.tseries.frequencies import to_offset
 from pandas.tseries.offsets import Tick
-from ... import opcodes as OperandDef
+from ... import opcodes
 from ...core import OutputType
 from ...serialization.serializables import AnyField, BoolField, Int64Field, StringField
 from ...utils import no_default, pd_release_version
@@ -117,7 +117,7 @@ def generate_range_count(
 class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
-    _op_type_ = OperandDef.DATE_RANGE
+    _op_type_ = opcodes.DATE_RANGE
     start = AnyField("start")
     end = AnyField("end")

maxframe/dataframe/datasource/read_odps_query.py CHANGED Viewed

@@ -47,6 +47,7 @@ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
     re.MULTILINE,
 )
 _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
+_ANONYMOUS_COL_REGEX = re.compile(r"^_c\d+$")
 @dataclasses.dataclass
@@ -272,6 +273,11 @@ def read_odps_query(
     explain_str = list(inst.get_task_results().values())[0]
     odps_schema = _parse_explained_schema(explain_str)
+    for col in odps_schema.columns:
+        if _ANONYMOUS_COL_REGEX.match(col.name) and col.name not in query:
+            raise ValueError("Need to specify names for all columns in SELECT clause.")
     dtypes = odps_schema_to_pandas_dtypes(odps_schema)
     if not index_col:

maxframe/dataframe/datasource/read_odps_table.py CHANGED Viewed

@@ -119,9 +119,10 @@ class DataFrameReadODPSTable(
             return self.new_tileable(
                 [],
                 None,
-                shape=shape,
+                shape=shape[:1],
                 name=getattr(index_value, "name", None),
                 names=getattr(index_value, "names", None),
+                dtype=self.index_dtypes.iloc[0],
                 index_value=index_value,
                 chunk_bytes=chunk_bytes,
                 chunk_size=chunk_size,

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -21,6 +21,7 @@ import pytest
 from odps import ODPS
 from .... import tensor as mt
+from ....core import OutputType
 from ....tests.utils import tn
 from ....utils import lazy_import
 from ... import read_odps_query, read_odps_table
@@ -295,6 +296,15 @@ def test_from_odps_table():
         ),
     )
+    out_idx = read_odps_table(
+        test_table,
+        columns=[],
+        index_col=["col1", "col2"],
+        output_type=OutputType.index,
+    )
+    assert out_idx.names == ["col1", "col2"]
+    assert out_idx.shape == (np.nan,)
     test_table.drop()
     test_parted_table.drop()
@@ -319,6 +329,10 @@ def test_from_odps_query():
         read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
     assert "instant query" in err_info.value.args[0]
+    with pytest.raises(ValueError) as err_info:
+        read_odps_query(f"SELECT col1, col2 + col3 FROM {table1_name}")
+    assert "names" in err_info.value.args[0]
     query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
     df = read_odps_query(query1)
     assert df.op.query == query1

maxframe/dataframe/groupby/cum.py CHANGED Viewed

@@ -59,7 +59,6 @@ class GroupByCumReductionOperator(DataFrameOperatorMixin, DataFrameOperator):
         out_dtypes = self._calc_out_dtypes(groupby)
         kw = in_df.params.copy()
-        kw["index_value"] = parse_index(pd.RangeIndex(-1), groupby.key)
         if self.output_types[0] == OutputType.dataframe:
             kw.update(
                 dict(

maxframe/dataframe/groupby/tests/test_groupby.py CHANGED Viewed

@@ -282,14 +282,17 @@ def test_groupby_cum():
         r = getattr(mdf.groupby("b"), fun)()
         assert r.op.output_types[0] == OutputType.dataframe
         assert r.shape == (len(df1), 2)
+        assert r.index_value.key == mdf.index_value.key
         r = getattr(mdf.groupby("b"), fun)(axis=1)
         assert r.op.output_types[0] == OutputType.dataframe
         assert r.shape == (len(df1), 3)
+        assert r.index_value.key == mdf.index_value.key
     r = mdf.groupby("b").cumcount()
     assert r.op.output_types[0] == OutputType.series
     assert r.shape == (len(df1),)
+    assert r.index_value.key == mdf.index_value.key
     series1 = pd.Series([2, 2, 5, 7, 3, 7, 8, 8, 5, 6])
     ms1 = md.Series(series1, chunk_size=3)
@@ -298,6 +301,7 @@ def test_groupby_cum():
         r = getattr(ms1.groupby(lambda x: x % 2), fun)()
         assert r.op.output_types[0] == OutputType.series
         assert r.shape == (len(series1),)
+        assert r.index_value.key == ms1.index_value.key
 def test_groupby_fill():

maxframe/dataframe/indexing/add_prefix_suffix.py CHANGED Viewed

@@ -51,7 +51,7 @@ def _get_prefix_suffix_docs(is_prefix: bool):
     Examples
     --------
     >>> import maxframe.dataframe as md
-    >>> s = md.Series([1, 2, 3, 4])
+        >>> s = md.Series([1, 2, 3, 4])
     >>> s.execute()
     0    1
     1    2

maxframe/dataframe/indexing/rename.py CHANGED Viewed

@@ -17,7 +17,7 @@ import warnings
 from ... import opcodes
 from ...core import get_output_types
 from ...serialization.serializables import AnyField, StringField
-from ..core import SERIES_TYPE
+from ..core import INDEX_TYPE, SERIES_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..utils import build_df, build_series, parse_index, validate_axis
@@ -73,6 +73,8 @@ class DataFrameRename(DataFrameOperator, DataFrameOperatorMixin):
             params["index_value"] = parse_index(new_index)
         if df.ndim == 1:
             params["name"] = new_df.name
+            if isinstance(df, INDEX_TYPE):
+                params["names"] = new_df.names
         return self.new_tileable([df], **params)
@@ -303,11 +305,6 @@ def series_rename(
     1    2
     2    3
     Name: my_name, dtype: int64
-    >>> s.rename(lambda x: x ** 2).execute()  # function, changes labels.execute()
-    0    1
-    1    2
-    4    3
-    dtype: int64
     >>> s.rename({1: 3, 2: 5}).execute()  # mapping, changes labels.execute()
     0    1
     3    2
@@ -410,37 +407,6 @@ def index_set_names(index, names, level=None, inplace=False):
     See Also
     --------
     Index.rename : Able to set new names without level.
-    Examples
-    --------
-    >>> import maxframe.dataframe as md
-    >>> idx = md.Index([1, 2, 3, 4])
-    >>> idx.execute()
-    Int64Index([1, 2, 3, 4], dtype='int64')
-    >>> idx.set_names('quarter').execute()
-    Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
-    >>> idx = md.MultiIndex.from_product([['python', 'cobra'],
-    ...                                   [2018, 2019]])
-    >>> idx.execute()
-    MultiIndex([('python', 2018),
-                ('python', 2019),
-                ( 'cobra', 2018),
-                ( 'cobra', 2019)],
-               )
-    >>> idx.set_names(['kind', 'year'], inplace=True)
-    >>> idx.execute()
-    MultiIndex([('python', 2018),
-                ('python', 2019),
-                ( 'cobra', 2018),
-                ( 'cobra', 2019)],
-               names=['kind', 'year'])
-    >>> idx.set_names('species', level=0).execute()
-    MultiIndex([('python', 2018),
-                ('python', 2019),
-                ( 'cobra', 2018),
-                ( 'cobra', 2019)],
-               names=['species', 'year'])
     """
     op = DataFrameRename(
         index_mapper=names, level=level, output_types=get_output_types(index)

maxframe/dataframe/indexing/sample.py CHANGED Viewed

@@ -195,7 +195,6 @@ def sample(
             num_legs  num_wings  num_specimen_seen
     falcon         2          2                 10
     fish           0          0                  8
     """
     axis = validate_axis(axis or 0, df_or_series)
     if axis == 1:

maxframe 1.0.0rc1__cp38-cp38-win32.whl → 1.0.0rc2__cp38-cp38-win32.whl

Potentially problematic release.

maxframe 1.0.0rc1cp38-cp38-win32.whl → 1.0.0rc2cp38-cp38-win32.whl