PyPI - maxframe - Versions diffs - 2.2.0__cp310-cp310-macosx_10_9_universal2.whl → 2.3.0rc1__cp310-cp310-macosx_10_9_universal2.whl - Mend

maxframe 2.2.0__cp310-cp310-macosx_10_9_universal2.whl → 2.3.0rc1__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (114) hide show

maxframe/_utils.cpython-310-darwin.so +0 -0
maxframe/codegen/core.py +3 -2
maxframe/codegen/spe/dataframe/merge.py +4 -0
maxframe/codegen/spe/dataframe/misc.py +2 -0
maxframe/codegen/spe/dataframe/reduction.py +18 -0
maxframe/codegen/spe/dataframe/sort.py +9 -1
maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
maxframe/codegen/spe/dataframe/tseries.py +9 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
maxframe/codegen/spe/tensor/datasource.py +1 -0
maxframe/config/config.py +3 -0
maxframe/conftest.py +10 -0
maxframe/core/base.py +2 -1
maxframe/core/entity/tileables.py +2 -0
maxframe/core/graph/core.cpython-310-darwin.so +0 -0
maxframe/core/graph/entity.py +7 -1
maxframe/core/mode.py +6 -1
maxframe/dataframe/__init__.py +2 -2
maxframe/dataframe/arithmetic/__init__.py +4 -0
maxframe/dataframe/arithmetic/maximum.py +33 -0
maxframe/dataframe/arithmetic/minimum.py +33 -0
maxframe/dataframe/core.py +98 -106
maxframe/dataframe/datasource/core.py +6 -0
maxframe/dataframe/datasource/direct.py +57 -0
maxframe/dataframe/datasource/read_csv.py +19 -11
maxframe/dataframe/datasource/read_odps_query.py +29 -6
maxframe/dataframe/datasource/read_odps_table.py +32 -10
maxframe/dataframe/datasource/read_parquet.py +38 -39
maxframe/dataframe/datastore/__init__.py +6 -0
maxframe/dataframe/datastore/direct.py +268 -0
maxframe/dataframe/datastore/to_odps.py +6 -0
maxframe/dataframe/extensions/flatjson.py +2 -1
maxframe/dataframe/groupby/__init__.py +5 -1
maxframe/dataframe/groupby/aggregation.py +10 -6
maxframe/dataframe/groupby/apply_chunk.py +1 -3
maxframe/dataframe/groupby/core.py +20 -4
maxframe/dataframe/indexing/__init__.py +2 -1
maxframe/dataframe/indexing/insert.py +45 -17
maxframe/dataframe/merge/__init__.py +3 -0
maxframe/dataframe/merge/combine.py +244 -0
maxframe/dataframe/misc/__init__.py +14 -3
maxframe/dataframe/misc/check_unique.py +41 -10
maxframe/dataframe/misc/drop.py +31 -0
maxframe/dataframe/misc/infer_dtypes.py +251 -0
maxframe/dataframe/misc/map.py +31 -18
maxframe/dataframe/misc/repeat.py +159 -0
maxframe/dataframe/misc/tests/test_misc.py +35 -1
maxframe/dataframe/missing/checkna.py +3 -2
maxframe/dataframe/reduction/__init__.py +10 -5
maxframe/dataframe/reduction/aggregation.py +6 -6
maxframe/dataframe/reduction/argmax.py +7 -4
maxframe/dataframe/reduction/argmin.py +7 -4
maxframe/dataframe/reduction/core.py +18 -9
maxframe/dataframe/reduction/mode.py +144 -0
maxframe/dataframe/reduction/nunique.py +10 -3
maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
maxframe/dataframe/sort/__init__.py +9 -2
maxframe/dataframe/sort/argsort.py +7 -1
maxframe/dataframe/sort/core.py +1 -1
maxframe/dataframe/sort/rank.py +147 -0
maxframe/dataframe/tseries/__init__.py +19 -0
maxframe/dataframe/tseries/at_time.py +61 -0
maxframe/dataframe/tseries/between_time.py +122 -0
maxframe/dataframe/utils.py +30 -26
maxframe/learn/contrib/llm/core.py +16 -7
maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/config.py +221 -0
maxframe/learn/contrib/llm/deploy/core.py +247 -0
maxframe/learn/contrib/llm/deploy/framework.py +35 -0
maxframe/learn/contrib/llm/deploy/loader.py +360 -0
maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
maxframe/learn/contrib/llm/models/__init__.py +1 -0
maxframe/learn/contrib/llm/models/dashscope.py +12 -6
maxframe/learn/contrib/llm/models/managed.py +76 -11
maxframe/learn/contrib/llm/models/openai.py +72 -0
maxframe/learn/contrib/llm/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/tests/test_core.py +34 -0
maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
maxframe/learn/contrib/llm/text.py +348 -42
maxframe/learn/contrib/models.py +4 -1
maxframe/learn/contrib/xgboost/classifier.py +2 -0
maxframe/learn/contrib/xgboost/core.py +31 -7
maxframe/learn/contrib/xgboost/predict.py +4 -2
maxframe/learn/contrib/xgboost/regressor.py +5 -0
maxframe/learn/contrib/xgboost/train.py +2 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
maxframe/learn/utils/__init__.py +1 -0
maxframe/learn/utils/extmath.py +42 -9
maxframe/learn/utils/odpsio.py +80 -11
maxframe/lib/filesystem/_oss_lib/common.py +2 -0
maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
maxframe/opcodes.py +9 -1
maxframe/remote/core.py +4 -0
maxframe/serialization/core.cpython-310-darwin.so +0 -0
maxframe/serialization/tests/test_serial.py +2 -2
maxframe/tensor/arithmetic/__init__.py +1 -1
maxframe/tensor/arithmetic/core.py +2 -2
maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
maxframe/tensor/core.py +3 -0
maxframe/tensor/misc/copyto.py +1 -1
maxframe/tests/test_udf.py +61 -0
maxframe/tests/test_utils.py +8 -5
maxframe/udf.py +103 -7
maxframe/utils.py +61 -8
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
maxframe_client/session/task.py +8 -1
maxframe_client/tests/test_session.py +24 -0
maxframe/dataframe/arrays.py +0 -864
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0

maxframe/dataframe/misc/infer_dtypes.py ADDED Viewed

@@ -0,0 +1,251 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ... import opcodes
+from ...serialization.serializables import AnyField, StringField
+from ..core import DATAFRAME_TYPE, SERIES_TYPE
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+class DataFrameInferDtypes(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.DATAFRAME_INFER_DTYPES
+    infer_method = StringField("infer_method")
+    infer_kwargs = AnyField("infer_kwargs")
+    infer_stage = StringField("infer_stage", default=None)
+    def __init__(self, output_types=None, **kw):
+        super().__init__(_output_types=output_types, **kw)
+    def __call__(self, df):
+        if isinstance(df, DATAFRAME_TYPE):
+            return self.new_dataframe(
+                [df],
+                shape=df.shape,
+                dtypes=None,
+                index_value=df.index_value,
+                columns_value=df.columns_value,
+            )
+        else:
+            assert isinstance(df, SERIES_TYPE)
+            return self.new_series(
+                [df],
+                shape=df.shape,
+                dtype=None,
+                name=df.name,
+                index_value=df.index_value,
+            )
+def convert_dtypes(
+    df_or_series,
+    infer_objects=True,
+    convert_string=True,
+    convert_integer=True,
+    convert_boolean=True,
+    convert_floating=True,
+    dtype_backend="numpy",
+):
+    """
+    Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
+    Parameters
+    ----------
+    infer_objects : bool, default True
+        Whether object dtypes should be converted to the best possible types.
+    convert_string : bool, default True
+        Whether object dtypes should be converted to ``StringDtype()``.
+    convert_integer : bool, default True
+        Whether, if possible, conversion can be done to integer extension types.
+    convert_boolean : bool, defaults True
+        Whether object dtypes should be converted to ``BooleanDtypes()``.
+    convert_floating : bool, defaults True
+        Whether, if possible, conversion can be done to floating extension types.
+        If `convert_integer` is also True, preference will be give to integer
+        dtypes if the floats can be faithfully casted to integers.
+    Returns
+    -------
+    Series or DataFrame
+        Copy of input object with new dtype.
+    See Also
+    --------
+    infer_objects : Infer dtypes of objects.
+    to_datetime : Convert argument to datetime.
+    to_timedelta : Convert argument to timedelta.
+    to_numeric : Convert argument to a numeric type.
+    Notes
+    -----
+    By default, ``convert_dtypes`` will attempt to convert a Series (or each
+    Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
+    ``convert_string``, ``convert_integer``, ``convert_boolean`` and
+    ``convert_boolean``, it is possible to turn off individual conversions
+    to ``StringDtype``, the integer extension types, ``BooleanDtype``
+    or floating extension types, respectively.
+    For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
+    rules as during normal Series/DataFrame construction.  Then, if possible,
+    convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
+    or floating extension type, otherwise leave as ``object``.
+    If the dtype is integer, convert to an appropriate integer extension type.
+    If the dtype is numeric, and consists of all integers, convert to an
+    appropriate integer extension type. Otherwise, convert to an
+    appropriate floating extension type.
+    .. versionchanged:: 1.2
+        Starting with pandas 1.2, this method also converts float columns
+        to the nullable floating extension type.
+    In the future, as new dtypes are added that support ``pd.NA``, the results
+    of this method will change to support those new dtypes.
+    Examples
+    --------
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame(
+    ...     {
+    ...         "a": md.Series([1, 2, 3], dtype=mt.dtype("int32")),
+    ...         "b": md.Series(["x", "y", "z"], dtype=mt.dtype("O")),
+    ...         "c": md.Series([True, False, mt.nan], dtype=mt.dtype("O")),
+    ...         "d": md.Series(["h", "i", mt.nan], dtype=mt.dtype("O")),
+    ...         "e": md.Series([10, mt.nan, 20], dtype=mt.dtype("float")),
+    ...         "f": md.Series([mt.nan, 100.5, 200], dtype=mt.dtype("float")),
+    ...     }
+    ... )
+    Start with a DataFrame with default dtypes.
+    >>> df.execute()
+       a  b      c    d     e      f
+    0  1  x   True    h  10.0    NaN
+    1  2  y  False    i   NaN  100.5
+    2  3  z    NaN  NaN  20.0  200.0
+    >>> df.dtypes.execute()
+    a      int32
+    b     object
+    c     object
+    d     object
+    e    float64
+    f    float64
+    dtype: object
+    Convert the DataFrame to use best possible dtypes.
+    >>> dfn = df.convert_dtypes()
+    >>> dfn.execute()
+       a  b      c     d     e      f
+    0  1  x   True     h    10   <NA>
+    1  2  y  False     i  <NA>  100.5
+    2  3  z   <NA>  <NA>    20  200.0
+    >>> dfn.dtypes.execute()
+    a      Int32
+    b     string
+    c    boolean
+    d     string
+    e      Int64
+    f    Float64
+    dtype: object
+    Start with a Series of strings and missing data represented by ``np.nan``.
+    >>> s = md.Series(["a", "b", mt.nan])
+    >>> s.execute()
+    0      a
+    1      b
+    2    NaN
+    dtype: object
+    Obtain a Series with dtype ``StringDtype``.
+    >>> s.convert_dtypes().execute()
+    0       a
+    1       b
+    2    <NA>
+    dtype: string
+    """
+    dtype_backend = "numpy" if dtype_backend == "numpy_nullable" else dtype_backend
+    op = DataFrameInferDtypes(
+        infer_method="convert_dtypes",
+        infer_kwargs=dict(
+            infer_objects=infer_objects,
+            convert_string=convert_string,
+            convert_integer=convert_integer,
+            convert_boolean=convert_boolean,
+            convert_floating=convert_floating,
+            dtype_backend=dtype_backend,
+        ),
+    )
+    return op(df_or_series)
+def infer_objects(df_or_series, copy=True):
+    """
+    Attempt to infer better dtypes for object columns.
+    Attempts soft conversion of object-dtyped
+    columns, leaving non-object and unconvertible
+    columns unchanged. The inference rules are the
+    same as during normal Series/DataFrame construction.
+    Returns
+    -------
+    converted : same type as input object
+    See Also
+    --------
+    to_datetime : Convert argument to datetime.
+    to_timedelta : Convert argument to timedelta.
+    to_numeric : Convert argument to numeric type.
+    convert_dtypes : Convert argument to best possible dtype.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({"A": ["a", 1, 2, 3]})
+    >>> df = df.iloc[1:]
+    >>> df.execute()
+       A
+    1  1
+    2  2
+    3  3
+    >>> df.dtypes.execute()
+    A    object
+    dtype: object
+    >>> df.infer_objects().dtypes.execute()
+    A    int64
+    dtype: object
+    """
+    if (isinstance(df_or_series, SERIES_TYPE) and df_or_series.dtype != "O") or (
+        isinstance(df_or_series, DATAFRAME_TYPE)
+        and all(dt != "O" for dt in df_or_series.dtypes)
+    ):
+        # no objects to cast
+        return df_or_series
+    _ = copy  # in MaxFrame data are immutable, thus ignore the parameter
+    op = DataFrameInferDtypes(
+        infer_method="infer_objects",
+        infer_kwargs={},
+    )
+    return op(df_or_series)

maxframe/dataframe/misc/map.py CHANGED Viewed

@@ -21,8 +21,8 @@ import pandas as pd
 from ... import opcodes
 from ...core import EntityData, OutputType
 from ...serialization.serializables import AnyField, KeyField, StringField
-from ...udf import BuiltinFunction, MarkedFunction
-from ...utils import quiet_stdio
+from ...udf import BuiltinFunction, MarkedFunction, ODPSFunction
+from ...utils import make_dtype, quiet_stdio
 from ..core import SERIES_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..utils import build_series, copy_func_scheduling_hints
@@ -40,6 +40,7 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
         if not self.output_types:
             self.output_types = [OutputType.series]
         if hasattr(self, "arg"):
+            self.arg = ODPSFunction.wrap(self.arg)
             copy_func_scheduling_hints(self.arg, self)
     @classmethod
@@ -55,25 +56,34 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
         ) and not isinstance(self.arg, BuiltinFunction)
     def __call__(self, series, dtype, skip_infer=False):
-        if dtype is None and not skip_infer:
-            inferred_dtype = None
-            if callable(self.arg):
+        if dtype is not None:
+            dtype = make_dtype(dtype)
+        else:
+            # obtain dtype from existing hints
+            if isinstance(self.arg, ODPSFunction):
+                if self.arg.result_dtype is not None:
+                    dtype = self.arg.result_dtype
+            elif callable(self.arg):
                 # arg is a function, try to inspect the signature
                 sig = inspect.signature(self.arg)
                 return_type = sig.return_annotation
                 if return_type is not inspect._empty:
-                    inferred_dtype = np.dtype(return_type)
-                else:
-                    try:
-                        with quiet_stdio():
-                            # try to infer dtype by calling the function
-                            inferred_dtype = (
-                                build_series(series)
-                                .map(self.arg, na_action=self.na_action)
-                                .dtype
-                            )
-                    except:  # noqa: E722  # nosec
-                        pass
+                    dtype = np.dtype(return_type)
+        err_prefix = None
+        if dtype is None and not skip_infer:
+            inferred_dtype = None
+            if callable(self.arg):
+                try:
+                    with quiet_stdio():
+                        # try to infer dtype by calling the function
+                        inferred_dtype = (
+                            build_series(series)
+                            .map(self.arg, na_action=self.na_action)
+                            .dtype
+                        )
+                except:  # noqa: E722  # nosec
+                    pass
             else:
                 if isinstance(self.arg, MutableMapping):
                     inferred_dtype = pd.Series(self.arg).dtype
@@ -86,13 +96,16 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
                     # but for int, due to the nan which may occur,
                     # we cannot infer the dtype
                     dtype = inferred_dtype
+                else:
+                    err_prefix = "int type may not be exact"
             else:
                 dtype = inferred_dtype
         if dtype is None:
             if not skip_infer:
+                err_prefix = err_prefix or "cannot infer dtype"
                 raise ValueError(
-                    "cannot infer dtype, it needs to be specified manually for `map`"
+                    f"{err_prefix}, it needs to be specified manually for `map`"
                 )
         else:
             dtype = np.int64 if dtype is int else dtype

maxframe/dataframe/misc/repeat.py ADDED Viewed

@@ -0,0 +1,159 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+import numpy as np
+from pandas.api.types import is_list_like
+from ... import opcodes
+from ...core import ENTITY_TYPE, EntityData, get_output_types
+from ...serialization.serializables import AnyField
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+from ..utils import parse_index, validate_axis
+class DataFrameRepeat(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.REPEAT
+    repeats = AnyField("repeats", default=None)
+    def __init__(self, output_types=None, **kw):
+        super().__init__(_output_types=output_types, **kw)
+    @classmethod
+    def _set_inputs(cls, op: "DataFrameRepeat", inputs: List[EntityData]):
+        super()._set_inputs(op, inputs)
+        if isinstance(op.repeats, ENTITY_TYPE):
+            op.repeats = inputs[1]
+    def __call__(self, obj, repeats):
+        self._output_types = get_output_types(obj)
+        test_index = obj.index_value.to_pandas()[:0]
+        params = obj.params
+        params["index_value"] = parse_index(test_index, obj, type(self), self.repeats)
+        params["shape"] = (np.nan,)
+        inputs = [obj]
+        if isinstance(repeats, ENTITY_TYPE):
+            inputs.append(repeats)
+        return self.new_tileable(inputs, **params)
+def _repeat(obj, repeats, axis=None):
+    from ...tensor.datasource import tensor
+    axis = validate_axis(axis or 0, obj)
+    if is_list_like(repeats):
+        repeats = tensor(repeats)
+    op = DataFrameRepeat(repeats=repeats, axis=axis)
+    return op(obj, repeats)
+def series_repeat(obj, repeats, axis=None):
+    """
+    Repeat elements of a Series.
+    Returns a new Series where each element of the current Series
+    is repeated consecutively a given number of times.
+    Parameters
+    ----------
+    repeats : int or array of ints
+        The number of repetitions for each element. This should be a
+        non-negative integer. Repeating 0 times will return an empty
+        Series.
+    axis : None
+        Must be ``None``. Has no effect but is accepted for compatibility
+        with numpy.
+    Returns
+    -------
+    Series
+        Newly created Series with repeated elements.
+    See Also
+    --------
+    Index.repeat : Equivalent function for Index.
+    numpy.repeat : Similar method for :class:`numpy.ndarray`.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> s = md.Series(['a', 'b', 'c'])
+    >>> s.execute()
+    0    a
+    1    b
+    2    c
+    dtype: object
+    >>> s.repeat(2).execute()
+    0    a
+    0    a
+    1    b
+    1    b
+    2    c
+    2    c
+    dtype: object
+    >>> s.repeat([1, 2, 3]).execute()
+    0    a
+    1    b
+    1    b
+    2    c
+    2    c
+    2    c
+    dtype: object
+    """
+    return _repeat(obj, repeats, axis=axis)
+def index_repeat(obj, repeats, axis=None):
+    """
+    Repeat elements of an Index.
+    Returns a new Index where each element of the current Index
+    is repeated consecutively a given number of times.
+    Parameters
+    ----------
+    repeats : int or array of ints
+        The number of repetitions for each element. This should be a
+        non-negative integer. Repeating 0 times will return an empty
+        Index.
+    axis : None
+        Must be ``None``. Has no effect but is accepted for compatibility
+        with numpy.
+    Returns
+    -------
+    repeated_index : Index
+        Newly created Index with repeated elements.
+    See Also
+    --------
+    Series.repeat : Equivalent function for Series.
+    numpy.repeat : Similar method for :class:`numpy.ndarray`.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> idx = md.Index(['a', 'b', 'c'])
+    >>> idx.execute()
+    Index(['a', 'b', 'c'], dtype='object')
+    >>> idx.repeat(2).execute()
+    Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
+    >>> idx.repeat([1, 2, 3]).execute()
+    Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
+    """
+    return _repeat(obj, repeats, axis=axis)

maxframe/dataframe/misc/tests/test_misc.py CHANGED Viewed

@@ -22,7 +22,7 @@ from .... import opcodes
 from ....core import OutputType
 from ....dataframe import DataFrame
 from ....tensor.core import TENSOR_TYPE
-from ....udf import with_running_options
+from ....udf import ODPSFunction, with_running_options
 from ... import eval as maxframe_eval
 from ... import get_dummies, to_numeric
 from ...arithmetic import DataFrameGreater, DataFrameLess
@@ -613,3 +613,37 @@ def test_pivot_table():
     t = df.pivot_table(index=["A", "B"], columns="C", aggfunc="sum")
     assert isinstance(t.op, DataFramePivotTable)
     assert t.shape == (np.nan, np.nan)
+def test_map_with_functions():
+    raw = pd.Series([1, 2, 3], name="s_name")
+    series = from_pandas_series(raw, chunk_size=2)
+    # inferred type may not be exact
+    def fn1(val):
+        return val
+    with pytest.raises(ValueError, match="int type"):
+        series.map(fn1)
+    mapped = series.map(fn1, dtype="float64", skip_infer=True)
+    assert mapped.dtype == np.dtype("float64")
+    # test when type infer is valid
+    def fn2(val):
+        return val * 1.0
+    mapped = series.map(fn2)
+    assert mapped.dtype == np.dtype("float64")
+    # test function with type annotations
+    def fn3(val) -> int:
+        return val
+    mapped = series.map(fn3)
+    assert mapped.dtype == np.dtype("int64")
+    # test odps function
+    odps_func = ODPSFunction("test_odps_udf", dtype=np.float64)
+    mapped = series.map(odps_func)
+    assert isinstance(mapped.op.arg, ODPSFunction)
+    assert mapped.dtype == np.dtype("float64")

maxframe/dataframe/missing/checkna.py CHANGED Viewed

@@ -22,6 +22,7 @@ from ... import tensor as mt
 from ...core import ENTITY_TYPE, OutputType
 from ...serialization.serializables import BoolField
 from ...tensor.core import TENSOR_TYPE
+from ...utils import get_pd_option
 from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE, MultiIndex
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -138,7 +139,7 @@ def isna(obj):
     2     True
     dtype: bool
     """
-    use_inf_as_na = pd.get_option("mode.use_inf_as_na")
+    use_inf_as_na = get_pd_option("mode.use_inf_as_na", False)
     if isinstance(obj, MultiIndex):
         raise NotImplementedError("isna is not defined for MultiIndex")
     elif isinstance(obj, ENTITY_TYPE):
@@ -213,7 +214,7 @@ def notna(obj):
     2    False
     dtype: bool
     """
-    use_inf_as_na = pd.get_option("mode.use_inf_as_na")
+    use_inf_as_na = get_pd_option("mode.use_inf_as_na", False)
     if isinstance(obj, MultiIndex):
         raise NotImplementedError("isna is not defined for MultiIndex")
     elif isinstance(obj, ENTITY_TYPE):

maxframe/dataframe/reduction/__init__.py CHANGED Viewed

@@ -17,7 +17,7 @@ from .all import DataFrameAll
 from .any import DataFrameAny
 from .argmax import DataFrameArgMax
 from .argmin import DataFrameArgMin
-from .core import CustomReduction
+from .core import CustomReduction, NamedAgg
 from .count import DataFrameCount
 from .cummax import DataFrameCummax
 from .cummin import DataFrameCummin
@@ -31,6 +31,7 @@ from .max import DataFrameMax
 from .mean import DataFrameMean
 from .median import DataFrameMedian
 from .min import DataFrameMin
+from .mode import DataFrameMode
 from .nunique import DataFrameNunique
 from .prod import DataFrameProd
 from .reduction_size import DataFrameSize
@@ -47,8 +48,8 @@ def _install():
     from .aggregation import aggregate
     from .all import all_dataframe, all_index, all_series
     from .any import any_dataframe, any_index, any_series
-    from .argmax import argmax_series
-    from .argmin import argmin_series
+    from .argmax import argmax_series_index
+    from .argmin import argmin_series_index
     from .count import count_dataframe, count_series
     from .cov import cov_dataframe, cov_series
     from .cummax import cummax
@@ -62,6 +63,7 @@ def _install():
     from .mean import mean_dataframe, mean_series
     from .median import median_dataframe, median_series
     from .min import min_dataframe, min_index, min_series
+    from .mode import mode_dataframe, mode_series
     from .nunique import nunique_dataframe, nunique_series
     from .prod import prod_dataframe, prod_series
     from .reduction_size import size_dataframe, size_series
@@ -76,8 +78,8 @@ def _install():
         ("aggregate", aggregate, aggregate),
         ("all", all_series, all_dataframe),
         ("any", any_series, any_dataframe),
-        ("argmax", argmax_series, None),
-        ("argmin", argmin_series, None),
+        ("argmax", argmax_series_index, None),
+        ("argmin", argmin_series_index, None),
         ("count", count_series, count_dataframe),
         ("cov", cov_series, cov_dataframe),
         ("cummax", cummax, cummax),
@@ -92,6 +94,7 @@ def _install():
         ("mean", mean_series, mean_dataframe),
         ("median", median_series, median_dataframe),
         ("min", min_series, min_dataframe),
+        ("mode", mode_series, mode_dataframe),
         ("nunique", nunique_series, nunique_dataframe),
         ("prod", prod_series, prod_dataframe),
         ("product", prod_series, prod_dataframe),
@@ -118,6 +121,8 @@ def _install():
         setattr(t, "any", any_index)
         setattr(t, "min", min_index)
         setattr(t, "max", max_index)
+        setattr(t, "argmin", argmin_series_index)
+        setattr(t, "argmax", argmax_series_index)
 _install()

maxframe/dataframe/reduction/aggregation.py CHANGED Viewed

@@ -38,7 +38,7 @@ from ...serialization.serializables import (
 )
 from ...typing_ import TileableType
 from ...udf import BuiltinFunction
-from ...utils import lazy_import, pd_release_version
+from ...utils import get_pd_option, lazy_import, pd_release_version
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..utils import build_df, build_empty_df, build_series, parse_index, validate_axis
 from .core import (
@@ -92,8 +92,8 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
     _op_type_ = opcodes.AGGREGATE
     raw_func = AnyField("raw_func")
-    raw_func_kw = DictField("raw_func_kw")
-    func = AnyField("func")
+    raw_func_kw = DictField("raw_func_kw", default=None)
+    func = AnyField("func", default=None)
     func_rename = ListField("func_rename", default=None)
     axis = AnyField("axis", default=0)
     numeric_only = BoolField("numeric_only", default=None)
@@ -199,7 +199,7 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
         normalize_reduction_funcs(self, ndim=df.ndim)
         compile_reduction_funcs(self, df)
         if output_type is None or dtypes is None:
-            with enter_mode(kernel=False, build=False):
+            with enter_mode(kernel=False, build=False, mock=True):
                 dtypes, index = self._calc_result_shape(df)
         else:
             self.output_types = [output_type]
@@ -231,7 +231,7 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
             return self.new_series(
                 [df],
                 shape=new_shape,
-                dtype=dtypes[0],
+                dtype=dtypes.iloc[0],
                 name=dtypes.index[0],
                 index_value=new_index,
             )
@@ -456,7 +456,7 @@ def aggregate(df, func=None, axis=0, **kw):
     min    1
     """
     axis = validate_axis(axis, df)
-    use_inf_as_na = kw.pop("_use_inf_as_na", pd.get_option("mode.use_inf_as_na"))
+    use_inf_as_na = kw.pop("_use_inf_as_na", get_pd_option("mode.use_inf_as_na", False))
     if func == "unique":
         # workaround for direct call of unique function which
         #  returns a tensor directly