PyPI - maxframe - Versions diffs - 2.2.0__cp312-cp312-win_amd64.whl → 2.3.0rc1__cp312-cp312-win_amd64.whl - Mend

maxframe 2.2.0__cp312-cp312-win_amd64.whl → 2.3.0rc1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (114) hide show

maxframe/_utils.cp312-win_amd64.pyd +0 -0
maxframe/codegen/core.py +3 -2
maxframe/codegen/spe/dataframe/merge.py +4 -0
maxframe/codegen/spe/dataframe/misc.py +2 -0
maxframe/codegen/spe/dataframe/reduction.py +18 -0
maxframe/codegen/spe/dataframe/sort.py +9 -1
maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
maxframe/codegen/spe/dataframe/tseries.py +9 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
maxframe/codegen/spe/tensor/datasource.py +1 -0
maxframe/config/config.py +3 -0
maxframe/conftest.py +10 -0
maxframe/core/base.py +2 -1
maxframe/core/entity/tileables.py +2 -0
maxframe/core/graph/core.cp312-win_amd64.pyd +0 -0
maxframe/core/graph/entity.py +7 -1
maxframe/core/mode.py +6 -1
maxframe/dataframe/__init__.py +2 -2
maxframe/dataframe/arithmetic/__init__.py +4 -0
maxframe/dataframe/arithmetic/maximum.py +33 -0
maxframe/dataframe/arithmetic/minimum.py +33 -0
maxframe/dataframe/core.py +98 -106
maxframe/dataframe/datasource/core.py +6 -0
maxframe/dataframe/datasource/direct.py +57 -0
maxframe/dataframe/datasource/read_csv.py +19 -11
maxframe/dataframe/datasource/read_odps_query.py +29 -6
maxframe/dataframe/datasource/read_odps_table.py +32 -10
maxframe/dataframe/datasource/read_parquet.py +38 -39
maxframe/dataframe/datastore/__init__.py +6 -0
maxframe/dataframe/datastore/direct.py +268 -0
maxframe/dataframe/datastore/to_odps.py +6 -0
maxframe/dataframe/extensions/flatjson.py +2 -1
maxframe/dataframe/groupby/__init__.py +5 -1
maxframe/dataframe/groupby/aggregation.py +10 -6
maxframe/dataframe/groupby/apply_chunk.py +1 -3
maxframe/dataframe/groupby/core.py +20 -4
maxframe/dataframe/indexing/__init__.py +2 -1
maxframe/dataframe/indexing/insert.py +45 -17
maxframe/dataframe/merge/__init__.py +3 -0
maxframe/dataframe/merge/combine.py +244 -0
maxframe/dataframe/misc/__init__.py +14 -3
maxframe/dataframe/misc/check_unique.py +41 -10
maxframe/dataframe/misc/drop.py +31 -0
maxframe/dataframe/misc/infer_dtypes.py +251 -0
maxframe/dataframe/misc/map.py +31 -18
maxframe/dataframe/misc/repeat.py +159 -0
maxframe/dataframe/misc/tests/test_misc.py +35 -1
maxframe/dataframe/missing/checkna.py +3 -2
maxframe/dataframe/reduction/__init__.py +10 -5
maxframe/dataframe/reduction/aggregation.py +6 -6
maxframe/dataframe/reduction/argmax.py +7 -4
maxframe/dataframe/reduction/argmin.py +7 -4
maxframe/dataframe/reduction/core.py +18 -9
maxframe/dataframe/reduction/mode.py +144 -0
maxframe/dataframe/reduction/nunique.py +10 -3
maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
maxframe/dataframe/sort/__init__.py +9 -2
maxframe/dataframe/sort/argsort.py +7 -1
maxframe/dataframe/sort/core.py +1 -1
maxframe/dataframe/sort/rank.py +147 -0
maxframe/dataframe/tseries/__init__.py +19 -0
maxframe/dataframe/tseries/at_time.py +61 -0
maxframe/dataframe/tseries/between_time.py +122 -0
maxframe/dataframe/utils.py +30 -26
maxframe/learn/contrib/llm/core.py +16 -7
maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/config.py +221 -0
maxframe/learn/contrib/llm/deploy/core.py +247 -0
maxframe/learn/contrib/llm/deploy/framework.py +35 -0
maxframe/learn/contrib/llm/deploy/loader.py +360 -0
maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
maxframe/learn/contrib/llm/models/__init__.py +1 -0
maxframe/learn/contrib/llm/models/dashscope.py +12 -6
maxframe/learn/contrib/llm/models/managed.py +76 -11
maxframe/learn/contrib/llm/models/openai.py +72 -0
maxframe/learn/contrib/llm/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/tests/test_core.py +34 -0
maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
maxframe/learn/contrib/llm/text.py +348 -42
maxframe/learn/contrib/models.py +4 -1
maxframe/learn/contrib/xgboost/classifier.py +2 -0
maxframe/learn/contrib/xgboost/core.py +31 -7
maxframe/learn/contrib/xgboost/predict.py +4 -2
maxframe/learn/contrib/xgboost/regressor.py +5 -0
maxframe/learn/contrib/xgboost/train.py +2 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
maxframe/learn/utils/__init__.py +1 -0
maxframe/learn/utils/extmath.py +42 -9
maxframe/learn/utils/odpsio.py +80 -11
maxframe/lib/filesystem/_oss_lib/common.py +2 -0
maxframe/lib/mmh3.cp312-win_amd64.pyd +0 -0
maxframe/opcodes.py +9 -1
maxframe/remote/core.py +4 -0
maxframe/serialization/core.cp312-win_amd64.pyd +0 -0
maxframe/serialization/tests/test_serial.py +2 -2
maxframe/tensor/arithmetic/__init__.py +1 -1
maxframe/tensor/arithmetic/core.py +2 -2
maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
maxframe/tensor/core.py +3 -0
maxframe/tensor/misc/copyto.py +1 -1
maxframe/tests/test_udf.py +61 -0
maxframe/tests/test_utils.py +8 -5
maxframe/udf.py +103 -7
maxframe/utils.py +61 -8
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
maxframe_client/session/task.py +8 -1
maxframe_client/tests/test_session.py +24 -0
maxframe/dataframe/arrays.py +0 -864
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0

maxframe/dataframe/reduction/argmax.py CHANGED Viewed

@@ -40,7 +40,7 @@ class DataFrameArgMax(DataFrameReduction, DataFrameReductionMixin):
         return ReductionCallable(func_name=func_name, kwargs=kw)
-def argmax_series(series, axis=0, skipna=True):
+def argmax_series_index(series_or_index, axis=0, skipna=True, *args, **kwargs):
     """
     Return int position of the smallest value in the Series.
@@ -65,7 +65,7 @@ def argmax_series(series, axis=0, skipna=True):
     --------
     Series.argmin : Return position of the minimum value.
     Series.argmax : Return position of the maximum value.
-    numpy.ndarray.argmax : Equivalent method for numpy arrays.
+    maxframe.tensor.argmax : Equivalent method for tensors.
     Series.idxmax : Return index label of the maximum values.
     Series.idxmin : Return index label of the minimum values.
@@ -92,9 +92,12 @@ def argmax_series(series, axis=0, skipna=True):
     the minimum cereal calories is the first element,
     since series is zero-indexed.
     """
-    validate_axis(axis, series)
+    # args not implemented, just ignore
+    _ = args, kwargs
+    validate_axis(axis, series_or_index)
     op = DataFrameArgMax(
         dropna=skipna,
         output_types=[OutputType.scalar],
     )
-    return op(series)
+    return op(series_or_index)

maxframe/dataframe/reduction/argmin.py CHANGED Viewed

@@ -40,7 +40,7 @@ class DataFrameArgMin(DataFrameReduction, DataFrameReductionMixin):
         return ReductionCallable(func_name=func_name, kwargs=kw)
-def argmin_series(series, axis=0, skipna=True):
+def argmin_series_index(series_or_index, axis=0, skipna=True, *args, **kwargs):
     """
     Return int position of the smallest value in the Series.
@@ -65,7 +65,7 @@ def argmin_series(series, axis=0, skipna=True):
     --------
     Series.argmin : Return position of the minimum value.
     Series.argmax : Return position of the maximum value.
-    numpy.ndarray.argmin : Equivalent method for numpy arrays.
+    maxframe.tensor.argmin : Equivalent method for tensors.
     Series.idxmax : Return index label of the maximum values.
     Series.idxmin : Return index label of the minimum values.
@@ -92,9 +92,12 @@ def argmin_series(series, axis=0, skipna=True):
     the minimum cereal calories is the first element,
     since series is zero-indexed.
     """
-    validate_axis(axis, series)
+    # args not implemented, just ignore
+    _ = args, kwargs
+    validate_axis(axis, series_or_index)
     op = DataFrameArgMin(
         dropna=skipna,
         output_types=[OutputType.scalar],
     )
-    return op(series)
+    return op(series_or_index)

maxframe/dataframe/reduction/core.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import functools
 import inspect
-from collections import OrderedDict
+from collections import OrderedDict, namedtuple
 from typing import Any, Dict, List, NamedTuple, Optional, Tuple
 import msgpack
@@ -32,7 +32,7 @@ from ...serialization.serializables import (
     StringField,
 )
 from ...typing_ import TileableType
-from ...utils import get_item_if_scalar, pd_release_version, tokenize
+from ...utils import get_item_if_scalar, get_pd_option, pd_release_version, tokenize
 from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
 from ..utils import (
     build_df,
@@ -52,6 +52,8 @@ _reduce_bool_as_object = pd_release_version[:2] != (1, 2)
 _idx_reduction_without_numeric_only = pd_release_version[:2] < (1, 5)
+NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
 class DataFrameReduction(DataFrameOperator):
     _legacy_name = "DataFrameReductionOperator"  # since v2.2.0
@@ -70,7 +72,7 @@ class DataFrameReduction(DataFrameOperator):
     def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
         kw["use_inf_as_na"] = kw.pop(
-            "use_inf_as_na", pd.get_option("mode.use_inf_as_na")
+            "use_inf_as_na", get_pd_option("mode.use_inf_as_na", False)
         )
         super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
@@ -104,7 +106,7 @@ class DataFrameCumReduction(DataFrameOperator):
     def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
         kw["use_inf_as_na"] = kw.pop(
-            "use_inf_as_na", pd.get_option("mode.use_inf_as_na")
+            "use_inf_as_na", get_pd_option("mode.use_inf_as_na", False)
         )
         super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
@@ -300,10 +302,13 @@ class DataFrameReductionMixin(DataFrameOperatorMixin):
         if func_name == "custom_reduction":
             empty_series = build_series(series, ensure_string=True)
-            result_scalar = getattr(self, "custom_reduction").__call_agg__(empty_series)
-            if hasattr(result_scalar, "to_pandas"):  # pragma: no cover
-                result_scalar = result_scalar.to_pandas()
-            result_dtype = pd.Series(result_scalar).dtype
+            custom_reduction_obj = getattr(self, "custom_reduction")
+            result_dtype = getattr(custom_reduction_obj, "result_dtype", None)
+            if result_dtype is None:
+                result_scalar = custom_reduction_obj.__call_agg__(empty_series)
+                if hasattr(result_scalar, "to_pandas"):  # pragma: no cover
+                    result_scalar = result_scalar.to_pandas()
+                result_dtype = pd.Series(result_scalar).dtype
         else:
             result_dtype = _get_series_reduction_dtype(
                 series.dtype,
@@ -378,6 +383,10 @@ class CustomReduction:
     def __name__(self):
         return self.name
+    @property
+    def result_dtype(self):
+        return None
     def __call__(self, value):
         if isinstance(value, ENTITY_TYPE):
             from .custom_reduction import build_custom_reduction_result
@@ -512,7 +521,7 @@ class ReductionCompiler:
     def _check_function_valid(cls, func):
         if isinstance(func, functools.partial):
             return cls._check_function_valid(func.func)
-        elif isinstance(func, (CustomReduction, ReductionCallable)):
+        elif not hasattr(func, "__code__"):
             return
         func_code = func.__code__

maxframe/dataframe/reduction/mode.py ADDED Viewed

@@ -0,0 +1,144 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from ... import opcodes
+from ...core import OutputType, get_output_types
+from ...serialization.serializables import BoolField, Int32Field
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+from ..utils import parse_index, validate_axis
+class DataFrameMode(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.MODE
+    axis = Int32Field("axis", default=None)
+    numeric_only = BoolField("numeric_only", default=False)
+    dropna = BoolField("dropna", default=True)
+    combine_size = Int32Field("combine_size", default=None)
+    def __call__(self, in_obj):
+        self._output_types = get_output_types(in_obj)
+        params = in_obj.params
+        shape = list(in_obj.shape)
+        shape[self.axis] = np.nan
+        params["shape"] = tuple(shape)
+        if self.axis == 0:
+            pd_idx = in_obj.index_value.to_pandas()[:0]
+            params["index_value"] = parse_index(pd_idx)
+        else:
+            pd_idx = in_obj.columns_value.to_pandas()[:0]
+            params["columns_value"] = parse_index(pd_idx)
+            params["dtypes"] = None
+        return self.new_tileable([in_obj], **params)
+def mode_dataframe(df, axis=0, numeric_only=False, dropna=True, combine_size=None):
+    """
+    Get the mode(s) of each element along the selected axis.
+    The mode of a set of values is the value that appears most often.
+    It can be multiple values.
+    Parameters
+    ----------
+    axis : {0 or 'index', 1 or 'columns'}, default 0
+        The axis to iterate over while searching for the mode:
+        * 0 or 'index' : get mode of each column
+        * 1 or 'columns' : get mode of each row.
+    numeric_only : bool, default False
+        If True, only apply to numeric columns.
+    dropna : bool, default True
+        Don't consider counts of NaN/NaT.
+    Returns
+    -------
+    DataFrame
+        The modes of each column or row.
+    See Also
+    --------
+    Series.mode : Return the highest frequency value in a Series.
+    Series.value_counts : Return the counts of values in a Series.
+    Examples
+    --------
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame([('bird', 2, 2),
+    ...                    ('mammal', 4, mt.nan),
+    ...                    ('arthropod', 8, 0),
+    ...                    ('bird', 2, mt.nan)],
+    ...                   index=('falcon', 'horse', 'spider', 'ostrich'),
+    ...                   columns=('species', 'legs', 'wings'))
+    >>> df.execute()
+               species  legs  wings
+    falcon        bird     2    2.0
+    horse       mammal     4    NaN
+    spider   arthropod     8    0.0
+    ostrich       bird     2    NaN
+    By default, missing values are not considered, and the mode of wings
+    are both 0 and 2. Because the resulting DataFrame has two rows,
+    the second row of ``species`` and ``legs`` contains ``NaN``.
+    >>> df.mode().execute()
+      species  legs  wings
+    0    bird   2.0    0.0
+    1     NaN   NaN    2.0
+    Setting ``dropna=False`` ``NaN`` values are considered and they can be
+    the mode (like for wings).
+    >>> df.mode(dropna=False).execute()
+      species  legs  wings
+    0    bird     2    NaN
+    Setting ``numeric_only=True``, only the mode of numeric columns is
+    computed, and columns of other types are ignored.
+    >>> df.mode(numeric_only=True).execute()
+       legs  wings
+    0   2.0    0.0
+    1   NaN    2.0
+    To compute the mode over columns and not rows, use the axis parameter:
+    >>> df.mode(axis='columns', numeric_only=True).execute()
+               0    1
+    falcon   2.0  NaN
+    horse    4.0  NaN
+    spider   0.0  8.0
+    ostrich  2.0  NaN
+    """
+    op = DataFrameMode(
+        axis=validate_axis(axis),
+        numeric_only=numeric_only,
+        dropna=dropna,
+        combine_size=combine_size,
+        output_types=[OutputType.dataframe],
+    )
+    return op(df)
+def mode_series(series, dropna=True, combine_size=None):
+    """
+    Return the mode(s) of the Series.
+    The mode is the value that appears most often. There can be multiple modes.
+    Always returns Series even if only one value is returned.
+    Parameters
+    ----------
+    dropna : bool, default True
+        Don't consider counts of NaN/NaT.
+    Returns
+    -------
+    Series
+        Modes of the Series in sorted order.
+    """
+    op = DataFrameMode(
+        axis=0,
+        dropna=dropna,
+        combine_size=combine_size,
+        output_types=[OutputType.series],
+    )
+    return op(series)

maxframe/dataframe/reduction/nunique.py CHANGED Viewed

@@ -20,8 +20,9 @@ except ImportError:  # pragma: no cover
 from ... import opcodes
 from ...config import options
 from ...core import OutputType
-from ...serialization.serializables import BoolField
+from ...serialization.serializables import BoolField, StringField
 from ...utils import lazy_import
+from ..utils import validate_dtype_backend
 from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
 cudf = lazy_import("cudf")
@@ -32,7 +33,13 @@ class DataFrameNunique(DataFrameReduction, DataFrameReductionMixin):
     _func_name = "nunique"
     dropna = BoolField("dropna", default=None)
-    use_arrow_dtype = BoolField("use_arrow_dtype", default=None)
+    dtype_backend = StringField(
+        "dtype_backend", on_deserialize=validate_dtype_backend, default=None
+    )
+    def __init__(self, dtype_backend=None, **kw):
+        dtype_backend = validate_dtype_backend(dtype_backend)
+        super().__init__(dtype_backend=dtype_backend, **kw)
     @property
     def is_atomic(self):
@@ -137,6 +144,6 @@ def nunique_series(series, dropna=True):
     op = DataFrameNunique(
         dropna=dropna,
         output_types=[OutputType.scalar],
-        use_arrow_dtype=options.dataframe.use_arrow_dtype,
+        dtype_backend=options.dataframe.dtype_backend,
     )
     return op(series)

maxframe/dataframe/reduction/tests/test_reduction.py CHANGED Viewed

@@ -26,6 +26,7 @@ from .... import dataframe as md
 from ....lib.dtypes_extension import ArrowDtype
 from ....tensor import Tensor
 from ....tests.utils import assert_mf_index_dtype
+from ....udf import ODPSFunction
 from ...core import DataFrame, IndexValue, OutputType, Series
 from ...datasource.dataframe import from_pandas as from_pandas_df
 from ...datasource.series import from_pandas as from_pandas_series
@@ -527,3 +528,14 @@ def test_custom_aggregation():
         assert result.agg_funcs[0].agg_func_name == "custom_reduction"
         assert isinstance(result.agg_funcs[0].custom_reduction, MockReduction2)
         assert result.agg_funcs[0].output_limit == 2
+def test_aggregation_with_odps_function():
+    odps_func = ODPSFunction("test_odps_udaf", dtype=np.float64)
+    for ndim in [1, 2]:
+        compiler = ReductionCompiler()
+        compiler.add_function(odps_func, ndim=ndim)
+        result = compiler.compile()
+        assert result.agg_funcs[0].map_func_name == "custom_reduction"
+        assert result.agg_funcs[0].agg_func_name == "custom_reduction"
+        assert isinstance(result.agg_funcs[0].custom_reduction, ODPSFunction)

maxframe/dataframe/sort/__init__.py CHANGED Viewed

@@ -12,21 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .rank import DataFrameRank
 from .sort_index import DataFrameSortIndex
 from .sort_values import DataFrameSortValues
 def _install():
-    from ..core import DATAFRAME_TYPE, SERIES_TYPE
-    from .argsort import series_argsort
+    from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
+    from .argsort import index_argsort, series_argsort
     from .nlargest import df_nlargest, series_nlargest
     from .nsmallest import df_nsmallest, series_nsmallest
+    from .rank import rank
     from .sort_index import sort_index
     from .sort_values import dataframe_sort_values, series_sort_values
     for cls in DATAFRAME_TYPE:
         setattr(cls, "nlargest", df_nlargest)
         setattr(cls, "nsmallest", df_nsmallest)
+        setattr(cls, "rank", rank)
         setattr(cls, "sort_values", dataframe_sort_values)
         setattr(cls, "sort_index", sort_index)
@@ -34,9 +37,13 @@ def _install():
         setattr(cls, "argsort", series_argsort)
         setattr(cls, "nlargest", series_nlargest)
         setattr(cls, "nsmallest", series_nsmallest)
+        setattr(cls, "rank", rank)
         setattr(cls, "sort_values", series_sort_values)
         setattr(cls, "sort_index", sort_index)
+    for cls in INDEX_TYPE:
+        setattr(cls, "argsort", index_argsort)
 _install()
 del _install

maxframe/dataframe/sort/argsort.py CHANGED Viewed

@@ -40,7 +40,7 @@ def series_argsort(series, axis=0, kind="quicksort", order=None, stable=None):
     See Also
     --------
-    numpy.ndarray.argsort : Returns the indices that would sort this array.
+    maxframe.tensor.argsort : Returns the indices that would sort this array.
     Examples
     --------
@@ -60,3 +60,9 @@ def series_argsort(series, axis=0, kind="quicksort", order=None, stable=None):
     axis = 0
     t = mt.argsort(series.to_tensor(), axis=axis, kind=kind)
     return series_from_tensor(t, index=series.index)
+def index_argsort(index, *args, **kwargs):
+    from ... import tensor as mt
+    return mt.argsort(index.to_tensor(), *args, **kwargs)

maxframe/dataframe/sort/core.py CHANGED Viewed

@@ -32,6 +32,6 @@ class DataFrameSortOperator(DataFrameOperator):
     na_position = StringField("na_position")
     ignore_index = BoolField("ignore_index")
     parallel_kind = StringField("parallel_kind")
-    psrs_kinds = ListField("psrs_kinds", FieldTypes.string)
+    psrs_kinds = ListField("psrs_kinds", FieldTypes.string, default=None)
     nrows = Int64Field("nrows", default=None)
     keep_kind = StringField("keep_kind", default="head")

maxframe/dataframe/sort/rank.py ADDED Viewed

@@ -0,0 +1,147 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pandas as pd
+from ...serialization.serializables import BoolField, StringField
+from ..operators import DataFrameOperatorMixin
+from .core import DataFrameSortOperator
+class DataFrameRank(DataFrameSortOperator, DataFrameOperatorMixin):
+    method = StringField("method", default=None)
+    numeric_only = BoolField("numeric_only", default=None)
+    pct = BoolField("pct", default=False)
+    @property
+    def na_option(self):
+        return self.na_position
+    def __call__(self, df_obj):
+        params = df_obj.params
+        if df_obj.ndim == 2:  # dataframe
+            if self.numeric_only:
+                sel_df = df_obj.select_dtypes(include=[np.number])
+                cols = sel_df.dtypes.index
+            else:
+                cols = df_obj.dtypes.index
+            params["dtypes"] = pd.Series([np.dtype(float)] * len(cols), index=cols)
+            return self.new_dataframe([df_obj], **params)
+        else:
+            params["dtypes"] = np.dtype(float)
+            return self.new_series([df_obj], **params)
+def rank(
+    df,
+    axis=0,
+    method="average",
+    numeric_only=False,
+    na_option="keep",
+    ascending=True,
+    pct=False,
+):
+    """
+    Compute numerical data ranks (1 through n) along axis.
+    By default, equal values are assigned a rank that is the average of the
+    ranks of those values.
+    Parameters
+    ----------
+    axis : {0 or 'index', 1 or 'columns'}, default 0
+        Index to direct ranking.
+    method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+        How to rank the group of records that have the same value (i.e. ties):
+        * average: average rank of the group
+        * min: lowest rank in the group
+        * max: highest rank in the group
+        * first: ranks assigned in order they appear in the array
+        * dense: like 'min', but rank always increases by 1 between groups.
+    numeric_only : bool, optional
+        For DataFrame objects, rank only numeric columns if set to True.
+    na_option : {'keep', 'top', 'bottom'}, default 'keep'
+        How to rank NaN values:
+        * keep: assign NaN rank to NaN values
+        * top: assign lowest rank to NaN values
+        * bottom: assign highest rank to NaN values
+    ascending : bool, default True
+        Whether or not the elements should be ranked in ascending order.
+    pct : bool, default False
+        Whether or not to display the returned rankings in percentile
+        form.
+    Returns
+    -------
+    same type as caller
+        Return a Series or DataFrame with data ranks as values.
+    See Also
+    --------
+    core.groupby.GroupBy.rank : Rank of values within each group.
+    Examples
+    --------
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',
+    ...                                    'spider', 'snake'],
+    ...                         'Number_legs': [4, 2, 4, 8, mt.nan]})
+    >>> df.execute()
+        Animal  Number_legs
+    0      cat          4.0
+    1  penguin          2.0
+    2      dog          4.0
+    3   spider          8.0
+    4    snake          NaN
+    The following example shows how the method behaves with the above
+    parameters:
+    * default_rank: this is the default behaviour obtained without using
+      any parameter.
+    * max_rank: setting ``method = 'max'`` the records that have the
+      same values are ranked using the highest rank (e.g.: since 'cat'
+      and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.)
+    * NA_bottom: choosing ``na_option = 'bottom'``, if there are records
+      with NaN values they are placed at the bottom of the ranking.
+    * pct_rank: when setting ``pct = True``, the ranking is expressed as
+      percentile rank.
+    >>> df['default_rank'] = df['Number_legs'].rank()
+    >>> df['max_rank'] = df['Number_legs'].rank(method='max')
+    >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')
+    >>> df['pct_rank'] = df['Number_legs'].rank(pct=True)
+    >>> df.execute()
+        Animal  Number_legs  default_rank  max_rank  NA_bottom  pct_rank
+    0      cat          4.0           2.5       3.0        2.5     0.625
+    1  penguin          2.0           1.0       1.0        1.0     0.250
+    2      dog          4.0           2.5       3.0        2.5     0.625
+    3   spider          8.0           4.0       4.0        4.0     1.000
+    4    snake          NaN           NaN       NaN        5.0       NaN
+    """
+    op = DataFrameRank(
+        axis=axis,
+        method=method,
+        numeric_only=numeric_only,
+        na_position=na_option,
+        ascending=ascending,
+        pct=pct,
+    )
+    return op(df)

maxframe/dataframe/tseries/__init__.py CHANGED Viewed

@@ -11,3 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+def _install():
+    from ..core import DATAFRAME_TYPE, SERIES_TYPE
+    from .at_time import at_time
+    from .between_time import between_time
+    from .to_datetime import to_datetime  # noqa
+    for t in SERIES_TYPE:
+        t.at_time = at_time
+        t.between_time = between_time
+    for t in DATAFRAME_TYPE:
+        t.at_time = at_time
+        t.between_time = between_time
+_install()
+del _install

maxframe/dataframe/tseries/at_time.py ADDED Viewed

@@ -0,0 +1,61 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+def at_time(df_or_series, time, axis=0):
+    """
+    Select values at particular time of day (e.g., 9:30AM).
+    Parameters
+    ----------
+    time : datetime.time or str
+        The values to select.
+    axis : {0 or 'index', 1 or 'columns'}, default 0
+        For `Series` this parameter is unused and defaults to 0.
+    Returns
+    -------
+    Series or DataFrame
+    Raises
+    ------
+    TypeError
+        If the index is not  a :class:`DatetimeIndex`
+    See Also
+    --------
+    between_time : Select values between particular times of the day.
+    first : Select initial periods of time series based on a date offset.
+    last : Select final periods of time series based on a date offset.
+    DatetimeIndex.indexer_at_time : Get just the index locations for
+        values at particular time of the day.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> i = md.date_range('2018-04-09', periods=4, freq='12h')
+    >>> ts = md.DataFrame({'A': [1, 2, 3, 4]}, index=i)
+    >>> ts.execute()
+                         A
+    2018-04-09 00:00:00  1
+    2018-04-09 12:00:00  2
+    2018-04-10 00:00:00  3
+    2018-04-10 12:00:00  4
+    >>> ts.at_time('12:00').execute()
+                         A
+    2018-04-09 12:00:00  2
+    2018-04-10 12:00:00  4
+    """
+    return df_or_series.between_time(time, time, inclusive="both", axis=axis)