maxframe 2.2.0__cp38-cp38-win32.whl → 2.3.0rc1__cp38-cp38-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (114) hide show
  1. maxframe/_utils.cp38-win32.pyd +0 -0
  2. maxframe/codegen/core.py +3 -2
  3. maxframe/codegen/spe/dataframe/merge.py +4 -0
  4. maxframe/codegen/spe/dataframe/misc.py +2 -0
  5. maxframe/codegen/spe/dataframe/reduction.py +18 -0
  6. maxframe/codegen/spe/dataframe/sort.py +9 -1
  7. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  8. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  9. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  10. maxframe/codegen/spe/tensor/datasource.py +1 -0
  11. maxframe/config/config.py +3 -0
  12. maxframe/conftest.py +10 -0
  13. maxframe/core/base.py +2 -1
  14. maxframe/core/entity/tileables.py +2 -0
  15. maxframe/core/graph/core.cp38-win32.pyd +0 -0
  16. maxframe/core/graph/entity.py +7 -1
  17. maxframe/core/mode.py +6 -1
  18. maxframe/dataframe/__init__.py +2 -2
  19. maxframe/dataframe/arithmetic/__init__.py +4 -0
  20. maxframe/dataframe/arithmetic/maximum.py +33 -0
  21. maxframe/dataframe/arithmetic/minimum.py +33 -0
  22. maxframe/dataframe/core.py +98 -106
  23. maxframe/dataframe/datasource/core.py +6 -0
  24. maxframe/dataframe/datasource/direct.py +57 -0
  25. maxframe/dataframe/datasource/read_csv.py +19 -11
  26. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  27. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  28. maxframe/dataframe/datasource/read_parquet.py +38 -39
  29. maxframe/dataframe/datastore/__init__.py +6 -0
  30. maxframe/dataframe/datastore/direct.py +268 -0
  31. maxframe/dataframe/datastore/to_odps.py +6 -0
  32. maxframe/dataframe/extensions/flatjson.py +2 -1
  33. maxframe/dataframe/groupby/__init__.py +5 -1
  34. maxframe/dataframe/groupby/aggregation.py +10 -6
  35. maxframe/dataframe/groupby/apply_chunk.py +1 -3
  36. maxframe/dataframe/groupby/core.py +20 -4
  37. maxframe/dataframe/indexing/__init__.py +2 -1
  38. maxframe/dataframe/indexing/insert.py +45 -17
  39. maxframe/dataframe/merge/__init__.py +3 -0
  40. maxframe/dataframe/merge/combine.py +244 -0
  41. maxframe/dataframe/misc/__init__.py +14 -3
  42. maxframe/dataframe/misc/check_unique.py +41 -10
  43. maxframe/dataframe/misc/drop.py +31 -0
  44. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  45. maxframe/dataframe/misc/map.py +31 -18
  46. maxframe/dataframe/misc/repeat.py +159 -0
  47. maxframe/dataframe/misc/tests/test_misc.py +35 -1
  48. maxframe/dataframe/missing/checkna.py +3 -2
  49. maxframe/dataframe/reduction/__init__.py +10 -5
  50. maxframe/dataframe/reduction/aggregation.py +6 -6
  51. maxframe/dataframe/reduction/argmax.py +7 -4
  52. maxframe/dataframe/reduction/argmin.py +7 -4
  53. maxframe/dataframe/reduction/core.py +18 -9
  54. maxframe/dataframe/reduction/mode.py +144 -0
  55. maxframe/dataframe/reduction/nunique.py +10 -3
  56. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  57. maxframe/dataframe/sort/__init__.py +9 -2
  58. maxframe/dataframe/sort/argsort.py +7 -1
  59. maxframe/dataframe/sort/core.py +1 -1
  60. maxframe/dataframe/sort/rank.py +147 -0
  61. maxframe/dataframe/tseries/__init__.py +19 -0
  62. maxframe/dataframe/tseries/at_time.py +61 -0
  63. maxframe/dataframe/tseries/between_time.py +122 -0
  64. maxframe/dataframe/utils.py +30 -26
  65. maxframe/learn/contrib/llm/core.py +16 -7
  66. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  67. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  68. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  69. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  70. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  71. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  72. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  73. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  74. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  75. maxframe/learn/contrib/llm/models/managed.py +76 -11
  76. maxframe/learn/contrib/llm/models/openai.py +72 -0
  77. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  78. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  79. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  80. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  81. maxframe/learn/contrib/llm/text.py +348 -42
  82. maxframe/learn/contrib/models.py +4 -1
  83. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  84. maxframe/learn/contrib/xgboost/core.py +31 -7
  85. maxframe/learn/contrib/xgboost/predict.py +4 -2
  86. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  87. maxframe/learn/contrib/xgboost/train.py +2 -0
  88. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  89. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  90. maxframe/learn/utils/__init__.py +1 -0
  91. maxframe/learn/utils/extmath.py +42 -9
  92. maxframe/learn/utils/odpsio.py +80 -11
  93. maxframe/lib/filesystem/_oss_lib/common.py +2 -0
  94. maxframe/lib/mmh3.cp38-win32.pyd +0 -0
  95. maxframe/opcodes.py +9 -1
  96. maxframe/remote/core.py +4 -0
  97. maxframe/serialization/core.cp38-win32.pyd +0 -0
  98. maxframe/serialization/tests/test_serial.py +2 -2
  99. maxframe/tensor/arithmetic/__init__.py +1 -1
  100. maxframe/tensor/arithmetic/core.py +2 -2
  101. maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
  102. maxframe/tensor/core.py +3 -0
  103. maxframe/tensor/misc/copyto.py +1 -1
  104. maxframe/tests/test_udf.py +61 -0
  105. maxframe/tests/test_utils.py +8 -5
  106. maxframe/udf.py +103 -7
  107. maxframe/utils.py +61 -8
  108. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
  109. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
  110. maxframe_client/session/task.py +8 -1
  111. maxframe_client/tests/test_session.py +24 -0
  112. maxframe/dataframe/arrays.py +0 -864
  113. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  114. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,251 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ... import opcodes
16
+ from ...serialization.serializables import AnyField, StringField
17
+ from ..core import DATAFRAME_TYPE, SERIES_TYPE
18
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
19
+
20
+
21
+ class DataFrameInferDtypes(DataFrameOperator, DataFrameOperatorMixin):
22
+ _op_type_ = opcodes.DATAFRAME_INFER_DTYPES
23
+
24
+ infer_method = StringField("infer_method")
25
+ infer_kwargs = AnyField("infer_kwargs")
26
+
27
+ infer_stage = StringField("infer_stage", default=None)
28
+
29
+ def __init__(self, output_types=None, **kw):
30
+ super().__init__(_output_types=output_types, **kw)
31
+
32
+ def __call__(self, df):
33
+ if isinstance(df, DATAFRAME_TYPE):
34
+ return self.new_dataframe(
35
+ [df],
36
+ shape=df.shape,
37
+ dtypes=None,
38
+ index_value=df.index_value,
39
+ columns_value=df.columns_value,
40
+ )
41
+ else:
42
+ assert isinstance(df, SERIES_TYPE)
43
+ return self.new_series(
44
+ [df],
45
+ shape=df.shape,
46
+ dtype=None,
47
+ name=df.name,
48
+ index_value=df.index_value,
49
+ )
50
+
51
+
52
+ def convert_dtypes(
53
+ df_or_series,
54
+ infer_objects=True,
55
+ convert_string=True,
56
+ convert_integer=True,
57
+ convert_boolean=True,
58
+ convert_floating=True,
59
+ dtype_backend="numpy",
60
+ ):
61
+ """
62
+ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
63
+
64
+ Parameters
65
+ ----------
66
+ infer_objects : bool, default True
67
+ Whether object dtypes should be converted to the best possible types.
68
+ convert_string : bool, default True
69
+ Whether object dtypes should be converted to ``StringDtype()``.
70
+ convert_integer : bool, default True
71
+ Whether, if possible, conversion can be done to integer extension types.
72
+ convert_boolean : bool, defaults True
73
+ Whether object dtypes should be converted to ``BooleanDtypes()``.
74
+ convert_floating : bool, defaults True
75
+ Whether, if possible, conversion can be done to floating extension types.
76
+ If `convert_integer` is also True, preference will be give to integer
77
+ dtypes if the floats can be faithfully casted to integers.
78
+
79
+ Returns
80
+ -------
81
+ Series or DataFrame
82
+ Copy of input object with new dtype.
83
+
84
+ See Also
85
+ --------
86
+ infer_objects : Infer dtypes of objects.
87
+ to_datetime : Convert argument to datetime.
88
+ to_timedelta : Convert argument to timedelta.
89
+ to_numeric : Convert argument to a numeric type.
90
+
91
+ Notes
92
+ -----
93
+ By default, ``convert_dtypes`` will attempt to convert a Series (or each
94
+ Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
95
+ ``convert_string``, ``convert_integer``, ``convert_boolean`` and
96
+ ``convert_boolean``, it is possible to turn off individual conversions
97
+ to ``StringDtype``, the integer extension types, ``BooleanDtype``
98
+ or floating extension types, respectively.
99
+
100
+ For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
101
+ rules as during normal Series/DataFrame construction. Then, if possible,
102
+ convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
103
+ or floating extension type, otherwise leave as ``object``.
104
+
105
+ If the dtype is integer, convert to an appropriate integer extension type.
106
+
107
+ If the dtype is numeric, and consists of all integers, convert to an
108
+ appropriate integer extension type. Otherwise, convert to an
109
+ appropriate floating extension type.
110
+
111
+ .. versionchanged:: 1.2
112
+ Starting with pandas 1.2, this method also converts float columns
113
+ to the nullable floating extension type.
114
+
115
+ In the future, as new dtypes are added that support ``pd.NA``, the results
116
+ of this method will change to support those new dtypes.
117
+
118
+ Examples
119
+ --------
120
+ >>> import maxframe.tensor as mt
121
+ >>> import maxframe.dataframe as md
122
+ >>> df = md.DataFrame(
123
+ ... {
124
+ ... "a": md.Series([1, 2, 3], dtype=mt.dtype("int32")),
125
+ ... "b": md.Series(["x", "y", "z"], dtype=mt.dtype("O")),
126
+ ... "c": md.Series([True, False, mt.nan], dtype=mt.dtype("O")),
127
+ ... "d": md.Series(["h", "i", mt.nan], dtype=mt.dtype("O")),
128
+ ... "e": md.Series([10, mt.nan, 20], dtype=mt.dtype("float")),
129
+ ... "f": md.Series([mt.nan, 100.5, 200], dtype=mt.dtype("float")),
130
+ ... }
131
+ ... )
132
+
133
+ Start with a DataFrame with default dtypes.
134
+
135
+ >>> df.execute()
136
+ a b c d e f
137
+ 0 1 x True h 10.0 NaN
138
+ 1 2 y False i NaN 100.5
139
+ 2 3 z NaN NaN 20.0 200.0
140
+
141
+ >>> df.dtypes.execute()
142
+ a int32
143
+ b object
144
+ c object
145
+ d object
146
+ e float64
147
+ f float64
148
+ dtype: object
149
+
150
+ Convert the DataFrame to use best possible dtypes.
151
+
152
+ >>> dfn = df.convert_dtypes()
153
+ >>> dfn.execute()
154
+ a b c d e f
155
+ 0 1 x True h 10 <NA>
156
+ 1 2 y False i <NA> 100.5
157
+ 2 3 z <NA> <NA> 20 200.0
158
+
159
+ >>> dfn.dtypes.execute()
160
+ a Int32
161
+ b string
162
+ c boolean
163
+ d string
164
+ e Int64
165
+ f Float64
166
+ dtype: object
167
+
168
+ Start with a Series of strings and missing data represented by ``np.nan``.
169
+
170
+ >>> s = md.Series(["a", "b", mt.nan])
171
+ >>> s.execute()
172
+ 0 a
173
+ 1 b
174
+ 2 NaN
175
+ dtype: object
176
+
177
+ Obtain a Series with dtype ``StringDtype``.
178
+
179
+ >>> s.convert_dtypes().execute()
180
+ 0 a
181
+ 1 b
182
+ 2 <NA>
183
+ dtype: string
184
+ """
185
+ dtype_backend = "numpy" if dtype_backend == "numpy_nullable" else dtype_backend
186
+ op = DataFrameInferDtypes(
187
+ infer_method="convert_dtypes",
188
+ infer_kwargs=dict(
189
+ infer_objects=infer_objects,
190
+ convert_string=convert_string,
191
+ convert_integer=convert_integer,
192
+ convert_boolean=convert_boolean,
193
+ convert_floating=convert_floating,
194
+ dtype_backend=dtype_backend,
195
+ ),
196
+ )
197
+ return op(df_or_series)
198
+
199
+
200
+ def infer_objects(df_or_series, copy=True):
201
+ """
202
+ Attempt to infer better dtypes for object columns.
203
+
204
+ Attempts soft conversion of object-dtyped
205
+ columns, leaving non-object and unconvertible
206
+ columns unchanged. The inference rules are the
207
+ same as during normal Series/DataFrame construction.
208
+
209
+ Returns
210
+ -------
211
+ converted : same type as input object
212
+
213
+ See Also
214
+ --------
215
+ to_datetime : Convert argument to datetime.
216
+ to_timedelta : Convert argument to timedelta.
217
+ to_numeric : Convert argument to numeric type.
218
+ convert_dtypes : Convert argument to best possible dtype.
219
+
220
+ Examples
221
+ --------
222
+ >>> import maxframe.dataframe as md
223
+ >>> df = md.DataFrame({"A": ["a", 1, 2, 3]})
224
+ >>> df = df.iloc[1:]
225
+ >>> df.execute()
226
+ A
227
+ 1 1
228
+ 2 2
229
+ 3 3
230
+
231
+ >>> df.dtypes.execute()
232
+ A object
233
+ dtype: object
234
+
235
+ >>> df.infer_objects().dtypes.execute()
236
+ A int64
237
+ dtype: object
238
+ """
239
+ if (isinstance(df_or_series, SERIES_TYPE) and df_or_series.dtype != "O") or (
240
+ isinstance(df_or_series, DATAFRAME_TYPE)
241
+ and all(dt != "O" for dt in df_or_series.dtypes)
242
+ ):
243
+ # no objects to cast
244
+ return df_or_series
245
+
246
+ _ = copy # in MaxFrame data are immutable, thus ignore the parameter
247
+ op = DataFrameInferDtypes(
248
+ infer_method="infer_objects",
249
+ infer_kwargs={},
250
+ )
251
+ return op(df_or_series)
@@ -21,8 +21,8 @@ import pandas as pd
21
21
  from ... import opcodes
22
22
  from ...core import EntityData, OutputType
23
23
  from ...serialization.serializables import AnyField, KeyField, StringField
24
- from ...udf import BuiltinFunction, MarkedFunction
25
- from ...utils import quiet_stdio
24
+ from ...udf import BuiltinFunction, MarkedFunction, ODPSFunction
25
+ from ...utils import make_dtype, quiet_stdio
26
26
  from ..core import SERIES_TYPE
27
27
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
28
28
  from ..utils import build_series, copy_func_scheduling_hints
@@ -40,6 +40,7 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
40
40
  if not self.output_types:
41
41
  self.output_types = [OutputType.series]
42
42
  if hasattr(self, "arg"):
43
+ self.arg = ODPSFunction.wrap(self.arg)
43
44
  copy_func_scheduling_hints(self.arg, self)
44
45
 
45
46
  @classmethod
@@ -55,25 +56,34 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
55
56
  ) and not isinstance(self.arg, BuiltinFunction)
56
57
 
57
58
  def __call__(self, series, dtype, skip_infer=False):
58
- if dtype is None and not skip_infer:
59
- inferred_dtype = None
60
- if callable(self.arg):
59
+ if dtype is not None:
60
+ dtype = make_dtype(dtype)
61
+ else:
62
+ # obtain dtype from existing hints
63
+ if isinstance(self.arg, ODPSFunction):
64
+ if self.arg.result_dtype is not None:
65
+ dtype = self.arg.result_dtype
66
+ elif callable(self.arg):
61
67
  # arg is a function, try to inspect the signature
62
68
  sig = inspect.signature(self.arg)
63
69
  return_type = sig.return_annotation
64
70
  if return_type is not inspect._empty:
65
- inferred_dtype = np.dtype(return_type)
66
- else:
67
- try:
68
- with quiet_stdio():
69
- # try to infer dtype by calling the function
70
- inferred_dtype = (
71
- build_series(series)
72
- .map(self.arg, na_action=self.na_action)
73
- .dtype
74
- )
75
- except: # noqa: E722 # nosec
76
- pass
71
+ dtype = np.dtype(return_type)
72
+
73
+ err_prefix = None
74
+ if dtype is None and not skip_infer:
75
+ inferred_dtype = None
76
+ if callable(self.arg):
77
+ try:
78
+ with quiet_stdio():
79
+ # try to infer dtype by calling the function
80
+ inferred_dtype = (
81
+ build_series(series)
82
+ .map(self.arg, na_action=self.na_action)
83
+ .dtype
84
+ )
85
+ except: # noqa: E722 # nosec
86
+ pass
77
87
  else:
78
88
  if isinstance(self.arg, MutableMapping):
79
89
  inferred_dtype = pd.Series(self.arg).dtype
@@ -86,13 +96,16 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
86
96
  # but for int, due to the nan which may occur,
87
97
  # we cannot infer the dtype
88
98
  dtype = inferred_dtype
99
+ else:
100
+ err_prefix = "int type may not be exact"
89
101
  else:
90
102
  dtype = inferred_dtype
91
103
 
92
104
  if dtype is None:
93
105
  if not skip_infer:
106
+ err_prefix = err_prefix or "cannot infer dtype"
94
107
  raise ValueError(
95
- "cannot infer dtype, it needs to be specified manually for `map`"
108
+ f"{err_prefix}, it needs to be specified manually for `map`"
96
109
  )
97
110
  else:
98
111
  dtype = np.int64 if dtype is int else dtype
@@ -0,0 +1,159 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ import numpy as np
18
+ from pandas.api.types import is_list_like
19
+
20
+ from ... import opcodes
21
+ from ...core import ENTITY_TYPE, EntityData, get_output_types
22
+ from ...serialization.serializables import AnyField
23
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
+ from ..utils import parse_index, validate_axis
25
+
26
+
27
+ class DataFrameRepeat(DataFrameOperator, DataFrameOperatorMixin):
28
+ _op_type_ = opcodes.REPEAT
29
+
30
+ repeats = AnyField("repeats", default=None)
31
+
32
+ def __init__(self, output_types=None, **kw):
33
+ super().__init__(_output_types=output_types, **kw)
34
+
35
+ @classmethod
36
+ def _set_inputs(cls, op: "DataFrameRepeat", inputs: List[EntityData]):
37
+ super()._set_inputs(op, inputs)
38
+ if isinstance(op.repeats, ENTITY_TYPE):
39
+ op.repeats = inputs[1]
40
+
41
+ def __call__(self, obj, repeats):
42
+ self._output_types = get_output_types(obj)
43
+ test_index = obj.index_value.to_pandas()[:0]
44
+
45
+ params = obj.params
46
+ params["index_value"] = parse_index(test_index, obj, type(self), self.repeats)
47
+ params["shape"] = (np.nan,)
48
+
49
+ inputs = [obj]
50
+ if isinstance(repeats, ENTITY_TYPE):
51
+ inputs.append(repeats)
52
+ return self.new_tileable(inputs, **params)
53
+
54
+
55
+ def _repeat(obj, repeats, axis=None):
56
+ from ...tensor.datasource import tensor
57
+
58
+ axis = validate_axis(axis or 0, obj)
59
+ if is_list_like(repeats):
60
+ repeats = tensor(repeats)
61
+ op = DataFrameRepeat(repeats=repeats, axis=axis)
62
+ return op(obj, repeats)
63
+
64
+
65
+ def series_repeat(obj, repeats, axis=None):
66
+ """
67
+ Repeat elements of a Series.
68
+
69
+ Returns a new Series where each element of the current Series
70
+ is repeated consecutively a given number of times.
71
+
72
+ Parameters
73
+ ----------
74
+ repeats : int or array of ints
75
+ The number of repetitions for each element. This should be a
76
+ non-negative integer. Repeating 0 times will return an empty
77
+ Series.
78
+ axis : None
79
+ Must be ``None``. Has no effect but is accepted for compatibility
80
+ with numpy.
81
+
82
+ Returns
83
+ -------
84
+ Series
85
+ Newly created Series with repeated elements.
86
+
87
+ See Also
88
+ --------
89
+ Index.repeat : Equivalent function for Index.
90
+ numpy.repeat : Similar method for :class:`numpy.ndarray`.
91
+
92
+ Examples
93
+ --------
94
+ >>> import maxframe.dataframe as md
95
+ >>> s = md.Series(['a', 'b', 'c'])
96
+ >>> s.execute()
97
+ 0 a
98
+ 1 b
99
+ 2 c
100
+ dtype: object
101
+ >>> s.repeat(2).execute()
102
+ 0 a
103
+ 0 a
104
+ 1 b
105
+ 1 b
106
+ 2 c
107
+ 2 c
108
+ dtype: object
109
+ >>> s.repeat([1, 2, 3]).execute()
110
+ 0 a
111
+ 1 b
112
+ 1 b
113
+ 2 c
114
+ 2 c
115
+ 2 c
116
+ dtype: object
117
+ """
118
+ return _repeat(obj, repeats, axis=axis)
119
+
120
+
121
+ def index_repeat(obj, repeats, axis=None):
122
+ """
123
+ Repeat elements of an Index.
124
+
125
+ Returns a new Index where each element of the current Index
126
+ is repeated consecutively a given number of times.
127
+
128
+ Parameters
129
+ ----------
130
+ repeats : int or array of ints
131
+ The number of repetitions for each element. This should be a
132
+ non-negative integer. Repeating 0 times will return an empty
133
+ Index.
134
+ axis : None
135
+ Must be ``None``. Has no effect but is accepted for compatibility
136
+ with numpy.
137
+
138
+ Returns
139
+ -------
140
+ repeated_index : Index
141
+ Newly created Index with repeated elements.
142
+
143
+ See Also
144
+ --------
145
+ Series.repeat : Equivalent function for Series.
146
+ numpy.repeat : Similar method for :class:`numpy.ndarray`.
147
+
148
+ Examples
149
+ --------
150
+ >>> import maxframe.dataframe as md
151
+ >>> idx = md.Index(['a', 'b', 'c'])
152
+ >>> idx.execute()
153
+ Index(['a', 'b', 'c'], dtype='object')
154
+ >>> idx.repeat(2).execute()
155
+ Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
156
+ >>> idx.repeat([1, 2, 3]).execute()
157
+ Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
158
+ """
159
+ return _repeat(obj, repeats, axis=axis)
@@ -22,7 +22,7 @@ from .... import opcodes
22
22
  from ....core import OutputType
23
23
  from ....dataframe import DataFrame
24
24
  from ....tensor.core import TENSOR_TYPE
25
- from ....udf import with_running_options
25
+ from ....udf import ODPSFunction, with_running_options
26
26
  from ... import eval as maxframe_eval
27
27
  from ... import get_dummies, to_numeric
28
28
  from ...arithmetic import DataFrameGreater, DataFrameLess
@@ -613,3 +613,37 @@ def test_pivot_table():
613
613
  t = df.pivot_table(index=["A", "B"], columns="C", aggfunc="sum")
614
614
  assert isinstance(t.op, DataFramePivotTable)
615
615
  assert t.shape == (np.nan, np.nan)
616
+
617
+
618
+ def test_map_with_functions():
619
+ raw = pd.Series([1, 2, 3], name="s_name")
620
+ series = from_pandas_series(raw, chunk_size=2)
621
+
622
+ # inferred type may not be exact
623
+ def fn1(val):
624
+ return val
625
+
626
+ with pytest.raises(ValueError, match="int type"):
627
+ series.map(fn1)
628
+ mapped = series.map(fn1, dtype="float64", skip_infer=True)
629
+ assert mapped.dtype == np.dtype("float64")
630
+
631
+ # test when type infer is valid
632
+ def fn2(val):
633
+ return val * 1.0
634
+
635
+ mapped = series.map(fn2)
636
+ assert mapped.dtype == np.dtype("float64")
637
+
638
+ # test function with type annotations
639
+ def fn3(val) -> int:
640
+ return val
641
+
642
+ mapped = series.map(fn3)
643
+ assert mapped.dtype == np.dtype("int64")
644
+
645
+ # test odps function
646
+ odps_func = ODPSFunction("test_odps_udf", dtype=np.float64)
647
+ mapped = series.map(odps_func)
648
+ assert isinstance(mapped.op.arg, ODPSFunction)
649
+ assert mapped.dtype == np.dtype("float64")
@@ -22,6 +22,7 @@ from ... import tensor as mt
22
22
  from ...core import ENTITY_TYPE, OutputType
23
23
  from ...serialization.serializables import BoolField
24
24
  from ...tensor.core import TENSOR_TYPE
25
+ from ...utils import get_pd_option
25
26
  from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE, MultiIndex
26
27
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
27
28
 
@@ -138,7 +139,7 @@ def isna(obj):
138
139
  2 True
139
140
  dtype: bool
140
141
  """
141
- use_inf_as_na = pd.get_option("mode.use_inf_as_na")
142
+ use_inf_as_na = get_pd_option("mode.use_inf_as_na", False)
142
143
  if isinstance(obj, MultiIndex):
143
144
  raise NotImplementedError("isna is not defined for MultiIndex")
144
145
  elif isinstance(obj, ENTITY_TYPE):
@@ -213,7 +214,7 @@ def notna(obj):
213
214
  2 False
214
215
  dtype: bool
215
216
  """
216
- use_inf_as_na = pd.get_option("mode.use_inf_as_na")
217
+ use_inf_as_na = get_pd_option("mode.use_inf_as_na", False)
217
218
  if isinstance(obj, MultiIndex):
218
219
  raise NotImplementedError("isna is not defined for MultiIndex")
219
220
  elif isinstance(obj, ENTITY_TYPE):
@@ -17,7 +17,7 @@ from .all import DataFrameAll
17
17
  from .any import DataFrameAny
18
18
  from .argmax import DataFrameArgMax
19
19
  from .argmin import DataFrameArgMin
20
- from .core import CustomReduction
20
+ from .core import CustomReduction, NamedAgg
21
21
  from .count import DataFrameCount
22
22
  from .cummax import DataFrameCummax
23
23
  from .cummin import DataFrameCummin
@@ -31,6 +31,7 @@ from .max import DataFrameMax
31
31
  from .mean import DataFrameMean
32
32
  from .median import DataFrameMedian
33
33
  from .min import DataFrameMin
34
+ from .mode import DataFrameMode
34
35
  from .nunique import DataFrameNunique
35
36
  from .prod import DataFrameProd
36
37
  from .reduction_size import DataFrameSize
@@ -47,8 +48,8 @@ def _install():
47
48
  from .aggregation import aggregate
48
49
  from .all import all_dataframe, all_index, all_series
49
50
  from .any import any_dataframe, any_index, any_series
50
- from .argmax import argmax_series
51
- from .argmin import argmin_series
51
+ from .argmax import argmax_series_index
52
+ from .argmin import argmin_series_index
52
53
  from .count import count_dataframe, count_series
53
54
  from .cov import cov_dataframe, cov_series
54
55
  from .cummax import cummax
@@ -62,6 +63,7 @@ def _install():
62
63
  from .mean import mean_dataframe, mean_series
63
64
  from .median import median_dataframe, median_series
64
65
  from .min import min_dataframe, min_index, min_series
66
+ from .mode import mode_dataframe, mode_series
65
67
  from .nunique import nunique_dataframe, nunique_series
66
68
  from .prod import prod_dataframe, prod_series
67
69
  from .reduction_size import size_dataframe, size_series
@@ -76,8 +78,8 @@ def _install():
76
78
  ("aggregate", aggregate, aggregate),
77
79
  ("all", all_series, all_dataframe),
78
80
  ("any", any_series, any_dataframe),
79
- ("argmax", argmax_series, None),
80
- ("argmin", argmin_series, None),
81
+ ("argmax", argmax_series_index, None),
82
+ ("argmin", argmin_series_index, None),
81
83
  ("count", count_series, count_dataframe),
82
84
  ("cov", cov_series, cov_dataframe),
83
85
  ("cummax", cummax, cummax),
@@ -92,6 +94,7 @@ def _install():
92
94
  ("mean", mean_series, mean_dataframe),
93
95
  ("median", median_series, median_dataframe),
94
96
  ("min", min_series, min_dataframe),
97
+ ("mode", mode_series, mode_dataframe),
95
98
  ("nunique", nunique_series, nunique_dataframe),
96
99
  ("prod", prod_series, prod_dataframe),
97
100
  ("product", prod_series, prod_dataframe),
@@ -118,6 +121,8 @@ def _install():
118
121
  setattr(t, "any", any_index)
119
122
  setattr(t, "min", min_index)
120
123
  setattr(t, "max", max_index)
124
+ setattr(t, "argmin", argmin_series_index)
125
+ setattr(t, "argmax", argmax_series_index)
121
126
 
122
127
 
123
128
  _install()
@@ -38,7 +38,7 @@ from ...serialization.serializables import (
38
38
  )
39
39
  from ...typing_ import TileableType
40
40
  from ...udf import BuiltinFunction
41
- from ...utils import lazy_import, pd_release_version
41
+ from ...utils import get_pd_option, lazy_import, pd_release_version
42
42
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
43
43
  from ..utils import build_df, build_empty_df, build_series, parse_index, validate_axis
44
44
  from .core import (
@@ -92,8 +92,8 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
92
92
  _op_type_ = opcodes.AGGREGATE
93
93
 
94
94
  raw_func = AnyField("raw_func")
95
- raw_func_kw = DictField("raw_func_kw")
96
- func = AnyField("func")
95
+ raw_func_kw = DictField("raw_func_kw", default=None)
96
+ func = AnyField("func", default=None)
97
97
  func_rename = ListField("func_rename", default=None)
98
98
  axis = AnyField("axis", default=0)
99
99
  numeric_only = BoolField("numeric_only", default=None)
@@ -199,7 +199,7 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
199
199
  normalize_reduction_funcs(self, ndim=df.ndim)
200
200
  compile_reduction_funcs(self, df)
201
201
  if output_type is None or dtypes is None:
202
- with enter_mode(kernel=False, build=False):
202
+ with enter_mode(kernel=False, build=False, mock=True):
203
203
  dtypes, index = self._calc_result_shape(df)
204
204
  else:
205
205
  self.output_types = [output_type]
@@ -231,7 +231,7 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
231
231
  return self.new_series(
232
232
  [df],
233
233
  shape=new_shape,
234
- dtype=dtypes[0],
234
+ dtype=dtypes.iloc[0],
235
235
  name=dtypes.index[0],
236
236
  index_value=new_index,
237
237
  )
@@ -456,7 +456,7 @@ def aggregate(df, func=None, axis=0, **kw):
456
456
  min 1
457
457
  """
458
458
  axis = validate_axis(axis, df)
459
- use_inf_as_na = kw.pop("_use_inf_as_na", pd.get_option("mode.use_inf_as_na"))
459
+ use_inf_as_na = kw.pop("_use_inf_as_na", get_pd_option("mode.use_inf_as_na", False))
460
460
  if func == "unique":
461
461
  # workaround for direct call of unique function which
462
462
  # returns a tensor directly