maxframe 2.2.0__cp310-cp310-macosx_10_9_universal2.whl → 2.3.0rc1__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (114) hide show
  1. maxframe/_utils.cpython-310-darwin.so +0 -0
  2. maxframe/codegen/core.py +3 -2
  3. maxframe/codegen/spe/dataframe/merge.py +4 -0
  4. maxframe/codegen/spe/dataframe/misc.py +2 -0
  5. maxframe/codegen/spe/dataframe/reduction.py +18 -0
  6. maxframe/codegen/spe/dataframe/sort.py +9 -1
  7. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  8. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  9. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  10. maxframe/codegen/spe/tensor/datasource.py +1 -0
  11. maxframe/config/config.py +3 -0
  12. maxframe/conftest.py +10 -0
  13. maxframe/core/base.py +2 -1
  14. maxframe/core/entity/tileables.py +2 -0
  15. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  16. maxframe/core/graph/entity.py +7 -1
  17. maxframe/core/mode.py +6 -1
  18. maxframe/dataframe/__init__.py +2 -2
  19. maxframe/dataframe/arithmetic/__init__.py +4 -0
  20. maxframe/dataframe/arithmetic/maximum.py +33 -0
  21. maxframe/dataframe/arithmetic/minimum.py +33 -0
  22. maxframe/dataframe/core.py +98 -106
  23. maxframe/dataframe/datasource/core.py +6 -0
  24. maxframe/dataframe/datasource/direct.py +57 -0
  25. maxframe/dataframe/datasource/read_csv.py +19 -11
  26. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  27. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  28. maxframe/dataframe/datasource/read_parquet.py +38 -39
  29. maxframe/dataframe/datastore/__init__.py +6 -0
  30. maxframe/dataframe/datastore/direct.py +268 -0
  31. maxframe/dataframe/datastore/to_odps.py +6 -0
  32. maxframe/dataframe/extensions/flatjson.py +2 -1
  33. maxframe/dataframe/groupby/__init__.py +5 -1
  34. maxframe/dataframe/groupby/aggregation.py +10 -6
  35. maxframe/dataframe/groupby/apply_chunk.py +1 -3
  36. maxframe/dataframe/groupby/core.py +20 -4
  37. maxframe/dataframe/indexing/__init__.py +2 -1
  38. maxframe/dataframe/indexing/insert.py +45 -17
  39. maxframe/dataframe/merge/__init__.py +3 -0
  40. maxframe/dataframe/merge/combine.py +244 -0
  41. maxframe/dataframe/misc/__init__.py +14 -3
  42. maxframe/dataframe/misc/check_unique.py +41 -10
  43. maxframe/dataframe/misc/drop.py +31 -0
  44. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  45. maxframe/dataframe/misc/map.py +31 -18
  46. maxframe/dataframe/misc/repeat.py +159 -0
  47. maxframe/dataframe/misc/tests/test_misc.py +35 -1
  48. maxframe/dataframe/missing/checkna.py +3 -2
  49. maxframe/dataframe/reduction/__init__.py +10 -5
  50. maxframe/dataframe/reduction/aggregation.py +6 -6
  51. maxframe/dataframe/reduction/argmax.py +7 -4
  52. maxframe/dataframe/reduction/argmin.py +7 -4
  53. maxframe/dataframe/reduction/core.py +18 -9
  54. maxframe/dataframe/reduction/mode.py +144 -0
  55. maxframe/dataframe/reduction/nunique.py +10 -3
  56. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  57. maxframe/dataframe/sort/__init__.py +9 -2
  58. maxframe/dataframe/sort/argsort.py +7 -1
  59. maxframe/dataframe/sort/core.py +1 -1
  60. maxframe/dataframe/sort/rank.py +147 -0
  61. maxframe/dataframe/tseries/__init__.py +19 -0
  62. maxframe/dataframe/tseries/at_time.py +61 -0
  63. maxframe/dataframe/tseries/between_time.py +122 -0
  64. maxframe/dataframe/utils.py +30 -26
  65. maxframe/learn/contrib/llm/core.py +16 -7
  66. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  67. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  68. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  69. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  70. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  71. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  72. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  73. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  74. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  75. maxframe/learn/contrib/llm/models/managed.py +76 -11
  76. maxframe/learn/contrib/llm/models/openai.py +72 -0
  77. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  78. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  79. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  80. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  81. maxframe/learn/contrib/llm/text.py +348 -42
  82. maxframe/learn/contrib/models.py +4 -1
  83. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  84. maxframe/learn/contrib/xgboost/core.py +31 -7
  85. maxframe/learn/contrib/xgboost/predict.py +4 -2
  86. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  87. maxframe/learn/contrib/xgboost/train.py +2 -0
  88. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  89. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  90. maxframe/learn/utils/__init__.py +1 -0
  91. maxframe/learn/utils/extmath.py +42 -9
  92. maxframe/learn/utils/odpsio.py +80 -11
  93. maxframe/lib/filesystem/_oss_lib/common.py +2 -0
  94. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  95. maxframe/opcodes.py +9 -1
  96. maxframe/remote/core.py +4 -0
  97. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  98. maxframe/serialization/tests/test_serial.py +2 -2
  99. maxframe/tensor/arithmetic/__init__.py +1 -1
  100. maxframe/tensor/arithmetic/core.py +2 -2
  101. maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
  102. maxframe/tensor/core.py +3 -0
  103. maxframe/tensor/misc/copyto.py +1 -1
  104. maxframe/tests/test_udf.py +61 -0
  105. maxframe/tests/test_utils.py +8 -5
  106. maxframe/udf.py +103 -7
  107. maxframe/utils.py +61 -8
  108. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
  109. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
  110. maxframe_client/session/task.py +8 -1
  111. maxframe_client/tests/test_session.py +24 -0
  112. maxframe/dataframe/arrays.py +0 -864
  113. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  114. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -17,10 +17,10 @@ from typing import List
17
17
  import pandas as pd
18
18
 
19
19
  from ... import opcodes
20
- from ...core import EntityData
20
+ from ...core import EntityData, get_output_types
21
21
  from ...serialization.serializables import AnyField, BoolField, Int64Field
22
22
  from ...tensor.core import TENSOR_TYPE
23
- from ..core import SERIES_TYPE
23
+ from ..core import INDEX_TYPE, SERIES_TYPE
24
24
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
25
25
  from ..utils import build_empty_df, parse_index
26
26
 
@@ -29,9 +29,9 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
29
29
  _op_type_ = opcodes.INSERT
30
30
 
31
31
  loc = Int64Field("loc")
32
- column = AnyField("column")
33
- value = AnyField("value")
34
- allow_duplicates = BoolField("allow_duplicates")
32
+ column = AnyField("column", default=None)
33
+ value = AnyField("value", default=None)
34
+ allow_duplicates = BoolField("allow_duplicates", default=False)
35
35
 
36
36
  @classmethod
37
37
  def _set_inputs(cls, op: "DataFrameInsert", inputs: List[EntityData]):
@@ -40,6 +40,7 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
40
40
  op.value = op._inputs[-1]
41
41
 
42
42
  def __call__(self, df):
43
+ self._output_types = get_output_types(df)
43
44
  inputs = [df]
44
45
  if isinstance(self.value, (SERIES_TYPE, TENSOR_TYPE)):
45
46
  value_dtype = self.value.dtype
@@ -47,19 +48,27 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
47
48
  else:
48
49
  value_dtype = pd.Series(self.value).dtype
49
50
 
50
- empty_df = build_empty_df(df.dtypes)
51
- empty_df.insert(
52
- loc=self.loc,
53
- column=self.column,
54
- allow_duplicates=self.allow_duplicates,
55
- value=pd.Series([], dtype=value_dtype),
56
- )
57
-
58
51
  params = df.params
59
- params["columns_value"] = parse_index(empty_df.columns, store_data=True)
60
- params["dtypes"] = empty_df.dtypes
61
- params["shape"] = (df.shape[0], df.shape[1] + 1)
62
- return self.new_dataframe(inputs, **params)
52
+
53
+ if df.ndim == 2:
54
+ empty_obj = build_empty_df(df.dtypes)
55
+ empty_obj.insert(
56
+ loc=self.loc,
57
+ column=self.column,
58
+ allow_duplicates=self.allow_duplicates,
59
+ value=pd.Series([], dtype=value_dtype),
60
+ )
61
+
62
+ params["columns_value"] = parse_index(empty_obj.columns, store_data=True)
63
+ params["dtypes"] = empty_obj.dtypes
64
+ params["shape"] = (df.shape[0], df.shape[1] + 1)
65
+ else:
66
+ assert isinstance(df, INDEX_TYPE)
67
+ params["index_value"] = parse_index(
68
+ df.index_value, type(self), df, self.loc, self.value
69
+ )
70
+ params["shape"] = (df.shape[0] + 1,)
71
+ return self.new_tileable(inputs, **params)
63
72
 
64
73
 
65
74
  def df_insert(df, loc, column, value, allow_duplicates=False):
@@ -88,3 +97,22 @@ def df_insert(df, loc, column, value, allow_duplicates=False):
88
97
  )
89
98
  out_df = op(df)
90
99
  df.data = out_df.data
100
+
101
+
102
+ def index_insert(idx, loc, value):
103
+ """
104
+ Make new Index inserting new item at location.
105
+
106
+ Follows Python list.append semantics for negative values.
107
+
108
+ Parameters
109
+ ----------
110
+ loc : int
111
+ item : object
112
+
113
+ Returns
114
+ -------
115
+ new_index : Index
116
+ """
117
+ op = DataFrameInsert(loc=loc, value=value)
118
+ return op(idx)
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .append import append
16
+ from .combine import DataFrameCombine, df_combine, series_combine
16
17
  from .combine_first import df_combine_first, series_combine_first
17
18
  from .compare import DataFrameCompare, df_compare, series_compare
18
19
  from .concat import DataFrameConcat, concat
@@ -36,11 +37,13 @@ def _install():
36
37
  setattr(cls, "join", join)
37
38
  setattr(cls, "merge", merge)
38
39
  setattr(cls, "update", df_update)
40
+ setattr(cls, "combine", df_combine)
39
41
 
40
42
  for cls in SERIES_TYPE:
41
43
  setattr(cls, "combine_first", series_combine_first)
42
44
  setattr(cls, "compare", series_compare)
43
45
  setattr(cls, "update", series_update)
46
+ setattr(cls, "combine", series_combine)
44
47
 
45
48
  for cls in DATAFRAME_TYPE + SERIES_TYPE:
46
49
  setattr(cls, "append", append)
@@ -0,0 +1,244 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ... import opcodes
16
+ from ...serialization.serializables import AnyField, BoolField, FunctionField
17
+ from ...udf import BuiltinFunction
18
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
19
+
20
+
21
+ class DataFrameCombine(DataFrameOperator, DataFrameOperatorMixin):
22
+ _op_type_ = opcodes.DATAFRAME_COMBINE
23
+
24
+ func = FunctionField("func")
25
+ fill_value = AnyField("fill_value")
26
+ overwrite = BoolField("overwrite")
27
+
28
+ def has_custom_code(self) -> bool:
29
+ return not isinstance(self.func, BuiltinFunction)
30
+
31
+ def __call__(self, obj1, obj2):
32
+ from ..indexing.align import align
33
+
34
+ assert obj1.ndim == 1 and obj2.ndim == 1
35
+ obj1, obj2 = align(obj1, obj2)
36
+ # Create the output series based on the result series
37
+ return self.new_series(
38
+ [obj1, obj2],
39
+ shape=obj1.shape,
40
+ dtype=obj1.dtype,
41
+ index_value=obj1.index_value,
42
+ name=obj1.name,
43
+ )
44
+
45
+
46
+ def df_combine(df, other, func, fill_value=None, overwrite=True):
47
+ """
48
+ Perform column-wise combine with another DataFrame.
49
+
50
+ Combines a DataFrame with `other` DataFrame using `func`
51
+ to element-wise combine columns. The row and column indexes of the
52
+ resulting DataFrame will be the union of the two.
53
+
54
+ Parameters
55
+ ----------
56
+ other : DataFrame
57
+ The DataFrame to merge column-wise.
58
+ func : function
59
+ Function that takes two series as inputs and return a Series or a
60
+ scalar. Used to merge the two dataframes column by columns.
61
+ fill_value : scalar value, default None
62
+ The value to fill NaNs with prior to passing any column to the
63
+ merge func.
64
+ overwrite : bool, default True
65
+ If True, columns in `self` that do not exist in `other` will be
66
+ overwritten with NaNs.
67
+
68
+ Returns
69
+ -------
70
+ DataFrame
71
+ Combination of the provided DataFrames.
72
+
73
+ See Also
74
+ --------
75
+ DataFrame.combine_first : Combine two DataFrame objects and default to
76
+ non-null values in frame calling the method.
77
+
78
+ Examples
79
+ --------
80
+ Combine using a simple function that chooses the smaller column.
81
+
82
+ >>> import maxframe.tensor as mt
83
+ >>> import maxframe.dataframe as md
84
+ >>> df1 = md.DataFrame({'A': [0, 0], 'B': [4, 4]})
85
+ >>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
86
+ >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
87
+ >>> df1.combine(df2, take_smaller).execute()
88
+ A B
89
+ 0 0 3
90
+ 1 0 3
91
+
92
+ Example using a true element-wise combine function.
93
+
94
+ >>> df1 = md.DataFrame({'A': [5, 0], 'B': [2, 4]})
95
+ >>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
96
+ >>> df1.combine(df2, mt.minimum).execute()
97
+ A B
98
+ 0 1 2
99
+ 1 0 3
100
+
101
+ Using `fill_value` fills Nones prior to passing the column to the
102
+ merge function.
103
+
104
+ >>> df1 = md.DataFrame({'A': [0, 0], 'B': [None, 4]})
105
+ >>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
106
+ >>> df1.combine(df2, take_smaller, fill_value=-5).execute()
107
+ A B
108
+ 0 0 -5.0
109
+ 1 0 4.0
110
+
111
+ However, if the same element in both dataframes is None, that None
112
+ is preserved
113
+
114
+ >>> df1 = md.DataFrame({'A': [0, 0], 'B': [None, 4]})
115
+ >>> df2 = md.DataFrame({'A': [1, 1], 'B': [None, 3]})
116
+ >>> df1.combine(df2, take_smaller, fill_value=-5).execute()
117
+ A B
118
+ 0 0 -5.0
119
+ 1 0 3.0
120
+
121
+ Example that demonstrates the use of `overwrite` and behavior when
122
+ the axis differ between the dataframes.
123
+
124
+ >>> df1 = md.DataFrame({'A': [0, 0], 'B': [4, 4]})
125
+ >>> df2 = md.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2])
126
+ >>> df1.combine(df2, take_smaller).execute()
127
+ A B C
128
+ 0 NaN NaN NaN
129
+ 1 NaN 3.0 -10.0
130
+ 2 NaN 3.0 1.0
131
+
132
+ >>> df1.combine(df2, take_smaller, overwrite=False).execute()
133
+ A B C
134
+ 0 0.0 NaN NaN
135
+ 1 0.0 3.0 -10.0
136
+ 2 NaN 3.0 1.0
137
+
138
+ Demonstrating the preference of the passed in dataframe.
139
+
140
+ >>> df2 = md.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2])
141
+ >>> df2.combine(df1, take_smaller).execute()
142
+ A B C
143
+ 0 0.0 NaN NaN
144
+ 1 0.0 3.0 NaN
145
+ 2 NaN 3.0 NaN
146
+
147
+ >>> df2.combine(df1, take_smaller, overwrite=False).execute()
148
+ A B C
149
+ 0 0.0 NaN NaN
150
+ 1 0.0 3.0 1.0
151
+ 2 NaN 3.0 1.0
152
+ """
153
+ # todo merge series logic into whole dataframe to reduce latency
154
+ from ..indexing.align import align
155
+ from .concat import concat
156
+
157
+ src_df_cols = set(df.dtypes.index)
158
+ src_other_cols = set(other.dtypes.index)
159
+
160
+ df, other = align(df, other)
161
+ col_data = []
162
+ for c in df.dtypes.index:
163
+ if c in src_df_cols and c in src_other_cols:
164
+ col_data.append(func(df[c], other[c]))
165
+ elif c in src_other_cols and not overwrite:
166
+ col_data.append(df[c])
167
+ else:
168
+ col_data.append(other[c])
169
+ res = concat(col_data, axis=1)
170
+ if fill_value is not None:
171
+ res = res.fillna(fill_value)
172
+ return res
173
+
174
+
175
+ def series_combine(series, other, func, fill_value=None):
176
+ """
177
+ Combine the Series with a Series or scalar according to `func`.
178
+
179
+ Combine the Series and `other` using `func` to perform elementwise
180
+ selection for combined Series.
181
+ `fill_value` is assumed when value is missing at some index
182
+ from one of the two objects being combined.
183
+
184
+ Parameters
185
+ ----------
186
+ other : Series or scalar
187
+ The value(s) to be combined with the `Series`.
188
+ func : function
189
+ Function that takes two scalars as inputs and returns an element.
190
+ fill_value : scalar, optional
191
+ The value to assume when an index is missing from
192
+ one Series or the other. The default specifies to use the
193
+ appropriate NaN value for the underlying dtype of the Series.
194
+
195
+ Returns
196
+ -------
197
+ Series
198
+ The result of combining the Series with the other object.
199
+
200
+ See Also
201
+ --------
202
+ Series.combine_first : Combine Series values, choosing the calling
203
+ Series' values first.
204
+
205
+ Examples
206
+ --------
207
+ Consider 2 Datasets ``s1`` and ``s2`` containing
208
+ highest clocked speeds of different birds.
209
+
210
+ >>> import maxframe.dataframe as md
211
+ >>> s1 = md.Series({'falcon': 330.0, 'eagle': 160.0})
212
+ >>> s1.execute()
213
+ falcon 330.0
214
+ eagle 160.0
215
+ dtype: float64
216
+ >>> s2 = md.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
217
+ >>> s2.execute()
218
+ falcon 345.0
219
+ eagle 200.0
220
+ duck 30.0
221
+ dtype: float64
222
+
223
+ Now, to combine the two datasets and view the highest speeds
224
+ of the birds across the two datasets
225
+
226
+ >>> s1.combine(s2, max).execute()
227
+ duck NaN
228
+ eagle 200.0
229
+ falcon 345.0
230
+ dtype: float64
231
+
232
+ In the previous example, the resulting value for duck is missing,
233
+ because the maximum of a NaN and a float is a NaN.
234
+ So, in the example, we set ``fill_value=0``,
235
+ so the maximum value returned will be the value from some dataset.
236
+
237
+ >>> s1.combine(s2, max, fill_value=0).execute()
238
+ duck 30.0
239
+ eagle 200.0
240
+ falcon 345.0
241
+ dtype: float64
242
+ """
243
+ op = DataFrameCombine(func=func, fill_value=fill_value, overwrite=True)
244
+ return op(series, other)
@@ -21,12 +21,12 @@ from .check_monotonic import (
21
21
  is_monotonic_decreasing,
22
22
  is_monotonic_increasing,
23
23
  )
24
- from .check_unique import is_unique
24
+ from .check_unique import index_is_unique, series_is_unique
25
25
  from .clip import clip
26
26
  from .cut import cut
27
27
  from .describe import describe
28
28
  from .diff import df_diff, series_diff
29
- from .drop import df_drop, df_pop, index_drop, series_drop
29
+ from .drop import df_drop, df_pop, index_drop, series_drop, series_pop
30
30
  from .drop_duplicates import (
31
31
  df_drop_duplicates,
32
32
  index_drop_duplicates,
@@ -35,12 +35,14 @@ from .drop_duplicates import (
35
35
  from .duplicated import df_duplicated, index_duplicated, series_duplicated
36
36
  from .eval import df_eval, df_query
37
37
  from .explode import df_explode, series_explode
38
+ from .infer_dtypes import convert_dtypes, infer_objects
38
39
  from .isin import df_isin, series_isin
39
40
  from .map import df_map, index_map, series_map
40
41
  from .memory_usage import df_memory_usage, index_memory_usage, series_memory_usage
41
42
  from .pct_change import pct_change
42
43
  from .qcut import qcut
43
44
  from .rechunk import rechunk
45
+ from .repeat import index_repeat, series_repeat
44
46
  from .select_dtypes import select_dtypes
45
47
  from .shift import shift, tshift
46
48
  from .transform import df_transform, series_transform
@@ -57,6 +59,7 @@ def _install():
57
59
  setattr(t, "applymap", df_map)
58
60
  setattr(t, "astype", astype)
59
61
  setattr(t, "clip", clip)
62
+ setattr(t, "convert_dtypes", convert_dtypes)
60
63
  setattr(t, "describe", describe)
61
64
  setattr(
62
65
  t, "__delitem__", lambda df, items: df_drop(df, items, axis=1, inplace=True)
@@ -68,6 +71,7 @@ def _install():
68
71
  setattr(t, "eval", df_eval)
69
72
  setattr(t, "explode", df_explode)
70
73
  setattr(t, "first_valid_index", first_valid_index)
74
+ setattr(t, "infer_objects", infer_objects)
71
75
  setattr(t, "isin", df_isin)
72
76
  setattr(t, "last_valid_index", last_valid_index)
73
77
  setattr(t, "map", df_map)
@@ -89,6 +93,7 @@ def _install():
89
93
  setattr(t, "case_when", case_when)
90
94
  setattr(t, "check_monotonic", check_monotonic)
91
95
  setattr(t, "clip", clip)
96
+ setattr(t, "convert_dtypes", convert_dtypes)
92
97
  setattr(t, "describe", describe)
93
98
  setattr(t, "diff", series_diff)
94
99
  setattr(t, "drop", series_drop)
@@ -96,16 +101,19 @@ def _install():
96
101
  setattr(t, "duplicated", series_duplicated)
97
102
  setattr(t, "explode", series_explode)
98
103
  setattr(t, "first_valid_index", first_valid_index)
104
+ setattr(t, "infer_objects", infer_objects)
99
105
  setattr(t, "is_monotonic", property(fget=is_monotonic))
100
106
  setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
101
107
  setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
102
108
  setattr(t, "isin", series_isin)
103
- setattr(t, "is_unique", property(fget=is_unique))
109
+ setattr(t, "is_unique", property(fget=series_is_unique))
104
110
  setattr(t, "last_valid_index", last_valid_index)
105
111
  setattr(t, "map", series_map)
106
112
  setattr(t, "memory_usage", series_memory_usage)
107
113
  setattr(t, "pct_change", pct_change)
114
+ setattr(t, "pop", series_pop)
108
115
  setattr(t, "rechunk", rechunk)
116
+ setattr(t, "repeat", series_repeat)
109
117
  setattr(t, "shift", shift)
110
118
  setattr(t, "transform", series_transform)
111
119
  setattr(t, "tshift", tshift)
@@ -118,12 +126,15 @@ def _install():
118
126
  setattr(t, "drop", index_drop)
119
127
  setattr(t, "drop_duplicates", index_drop_duplicates)
120
128
  setattr(t, "duplicated", index_duplicated)
129
+ setattr(t, "has_duplicates", property(fget=lambda x: not index_is_unique(x)))
121
130
  setattr(t, "is_monotonic", property(fget=is_monotonic))
122
131
  setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
123
132
  setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
133
+ setattr(t, "is_unique", property(fget=index_is_unique))
124
134
  setattr(t, "map", index_map)
125
135
  setattr(t, "memory_usage", index_memory_usage)
126
136
  setattr(t, "rechunk", rechunk)
137
+ setattr(t, "repeat", index_repeat)
127
138
  setattr(t, "value_counts", value_counts)
128
139
 
129
140
 
@@ -18,13 +18,27 @@ from ...udf import builtin_function
18
18
 
19
19
 
20
20
  @builtin_function
21
- def _tailor_unique(series):
22
- if not series.is_unique:
23
- return pd.Series([], name=series.name, dtype=series.dtype)
24
- return series
21
+ def _tailor_unique(series_or_idx):
22
+ if not series_or_idx.is_unique:
23
+ if isinstance(series_or_idx, pd.Series):
24
+ return series_or_idx.iloc[:0]
25
+ else:
26
+ return series_or_idx[:0]
27
+ return series_or_idx
25
28
 
26
29
 
27
- def is_unique(series):
30
+ def _is_unique(series_or_index):
31
+ from ... import tensor as mt
32
+
33
+ return mt.equal(
34
+ series_or_index.mf.apply_chunk(
35
+ _tailor_unique, dtype=series_or_index.dtype
36
+ ).nunique(),
37
+ mt.shape(series_or_index)[0],
38
+ )
39
+
40
+
41
+ def series_is_unique(series):
28
42
  """
29
43
  Return boolean if values in the object are unique.
30
44
 
@@ -43,9 +57,26 @@ def is_unique(series):
43
57
  >>> s.is_unique.execute()
44
58
  False
45
59
  """
46
- from ... import tensor as mt
60
+ return _is_unique(series)
47
61
 
48
- return mt.equal(
49
- series.mf.apply_chunk(_tailor_unique, dtype=series.dtype).nunique(),
50
- mt.shape(series)[0],
51
- )
62
+
63
+ def index_is_unique(index):
64
+ """
65
+ Return boolean if values in the index are unique.
66
+
67
+ Returns
68
+ -------
69
+ bool
70
+
71
+ Examples
72
+ --------
73
+ >>> import maxframe.dataframe as md
74
+ >>> index = md.Index([1, 2, 3])
75
+ >>> index.is_unique.execute()
76
+ True
77
+
78
+ >>> index = md.Index([1, 2, 3, 1])
79
+ >>> index.is_unique.execute()
80
+ False
81
+ """
82
+ return index.to_series().is_unique
@@ -419,6 +419,37 @@ def series_drop(
419
419
  )
420
420
 
421
421
 
422
+ def series_pop(series, item):
423
+ """
424
+ Return item and drops from series. Raise KeyError if not found.
425
+
426
+ Parameters
427
+ ----------
428
+ item : label
429
+ Index of the element that needs to be removed.
430
+
431
+ Returns
432
+ -------
433
+ Value that is popped from series.
434
+
435
+ Examples
436
+ --------
437
+ >>> import maxframe.dataframe as md
438
+ >>> ser = md.Series([1,2,3])
439
+
440
+ >>> ser.pop(0).execute()
441
+ 1
442
+
443
+ >>> ser.execute()
444
+ 1 2
445
+ 2 3
446
+ dtype: int64
447
+ """
448
+ scalar = series.data[item]
449
+ series_drop(series, item, inplace=True)
450
+ return scalar
451
+
452
+
422
453
  def index_drop(index, labels, errors="raise"):
423
454
  """
424
455
  Make new Index with passed list of labels deleted.