maxframe 2.2.0__cp39-cp39-win_amd64.whl → 2.3.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp39-win_amd64.pyd +0 -0
- maxframe/codegen/core.py +3 -2
- maxframe/codegen/spe/dataframe/merge.py +4 -0
- maxframe/codegen/spe/dataframe/misc.py +2 -0
- maxframe/codegen/spe/dataframe/reduction.py +18 -0
- maxframe/codegen/spe/dataframe/sort.py +9 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/config/config.py +3 -0
- maxframe/conftest.py +10 -0
- maxframe/core/base.py +2 -1
- maxframe/core/entity/tileables.py +2 -0
- maxframe/core/graph/core.cp39-win_amd64.pyd +0 -0
- maxframe/core/graph/entity.py +7 -1
- maxframe/core/mode.py +6 -1
- maxframe/dataframe/__init__.py +2 -2
- maxframe/dataframe/arithmetic/__init__.py +4 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/core.py +98 -106
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/read_csv.py +19 -11
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datastore/__init__.py +6 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_odps.py +6 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/groupby/__init__.py +5 -1
- maxframe/dataframe/groupby/aggregation.py +10 -6
- maxframe/dataframe/groupby/apply_chunk.py +1 -3
- maxframe/dataframe/groupby/core.py +20 -4
- maxframe/dataframe/indexing/__init__.py +2 -1
- maxframe/dataframe/indexing/insert.py +45 -17
- maxframe/dataframe/merge/__init__.py +3 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/misc/__init__.py +14 -3
- maxframe/dataframe/misc/check_unique.py +41 -10
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/map.py +31 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +35 -1
- maxframe/dataframe/missing/checkna.py +3 -2
- maxframe/dataframe/reduction/__init__.py +10 -5
- maxframe/dataframe/reduction/aggregation.py +6 -6
- maxframe/dataframe/reduction/argmax.py +7 -4
- maxframe/dataframe/reduction/argmin.py +7 -4
- maxframe/dataframe/reduction/core.py +18 -9
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +10 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/sort/__init__.py +9 -2
- maxframe/dataframe/sort/argsort.py +7 -1
- maxframe/dataframe/sort/core.py +1 -1
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/utils.py +30 -26
- maxframe/learn/contrib/llm/core.py +16 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +31 -7
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +2 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +1 -0
- maxframe/learn/utils/extmath.py +42 -9
- maxframe/learn/utils/odpsio.py +80 -11
- maxframe/lib/filesystem/_oss_lib/common.py +2 -0
- maxframe/lib/mmh3.cp39-win_amd64.pyd +0 -0
- maxframe/opcodes.py +9 -1
- maxframe/remote/core.py +4 -0
- maxframe/serialization/core.cp39-win_amd64.pyd +0 -0
- maxframe/serialization/tests/test_serial.py +2 -2
- maxframe/tensor/arithmetic/__init__.py +1 -1
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
- maxframe/tensor/core.py +3 -0
- maxframe/tensor/misc/copyto.py +1 -1
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +8 -5
- maxframe/udf.py +103 -7
- maxframe/utils.py +61 -8
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_session.py +24 -0
- maxframe/dataframe/arrays.py +0 -864
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -17,10 +17,10 @@ from typing import List
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
|
|
19
19
|
from ... import opcodes
|
|
20
|
-
from ...core import EntityData
|
|
20
|
+
from ...core import EntityData, get_output_types
|
|
21
21
|
from ...serialization.serializables import AnyField, BoolField, Int64Field
|
|
22
22
|
from ...tensor.core import TENSOR_TYPE
|
|
23
|
-
from ..core import SERIES_TYPE
|
|
23
|
+
from ..core import INDEX_TYPE, SERIES_TYPE
|
|
24
24
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
25
25
|
from ..utils import build_empty_df, parse_index
|
|
26
26
|
|
|
@@ -29,9 +29,9 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
29
29
|
_op_type_ = opcodes.INSERT
|
|
30
30
|
|
|
31
31
|
loc = Int64Field("loc")
|
|
32
|
-
column = AnyField("column")
|
|
33
|
-
value = AnyField("value")
|
|
34
|
-
allow_duplicates = BoolField("allow_duplicates")
|
|
32
|
+
column = AnyField("column", default=None)
|
|
33
|
+
value = AnyField("value", default=None)
|
|
34
|
+
allow_duplicates = BoolField("allow_duplicates", default=False)
|
|
35
35
|
|
|
36
36
|
@classmethod
|
|
37
37
|
def _set_inputs(cls, op: "DataFrameInsert", inputs: List[EntityData]):
|
|
@@ -40,6 +40,7 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
40
40
|
op.value = op._inputs[-1]
|
|
41
41
|
|
|
42
42
|
def __call__(self, df):
|
|
43
|
+
self._output_types = get_output_types(df)
|
|
43
44
|
inputs = [df]
|
|
44
45
|
if isinstance(self.value, (SERIES_TYPE, TENSOR_TYPE)):
|
|
45
46
|
value_dtype = self.value.dtype
|
|
@@ -47,19 +48,27 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
47
48
|
else:
|
|
48
49
|
value_dtype = pd.Series(self.value).dtype
|
|
49
50
|
|
|
50
|
-
empty_df = build_empty_df(df.dtypes)
|
|
51
|
-
empty_df.insert(
|
|
52
|
-
loc=self.loc,
|
|
53
|
-
column=self.column,
|
|
54
|
-
allow_duplicates=self.allow_duplicates,
|
|
55
|
-
value=pd.Series([], dtype=value_dtype),
|
|
56
|
-
)
|
|
57
|
-
|
|
58
51
|
params = df.params
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
52
|
+
|
|
53
|
+
if df.ndim == 2:
|
|
54
|
+
empty_obj = build_empty_df(df.dtypes)
|
|
55
|
+
empty_obj.insert(
|
|
56
|
+
loc=self.loc,
|
|
57
|
+
column=self.column,
|
|
58
|
+
allow_duplicates=self.allow_duplicates,
|
|
59
|
+
value=pd.Series([], dtype=value_dtype),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
params["columns_value"] = parse_index(empty_obj.columns, store_data=True)
|
|
63
|
+
params["dtypes"] = empty_obj.dtypes
|
|
64
|
+
params["shape"] = (df.shape[0], df.shape[1] + 1)
|
|
65
|
+
else:
|
|
66
|
+
assert isinstance(df, INDEX_TYPE)
|
|
67
|
+
params["index_value"] = parse_index(
|
|
68
|
+
df.index_value, type(self), df, self.loc, self.value
|
|
69
|
+
)
|
|
70
|
+
params["shape"] = (df.shape[0] + 1,)
|
|
71
|
+
return self.new_tileable(inputs, **params)
|
|
63
72
|
|
|
64
73
|
|
|
65
74
|
def df_insert(df, loc, column, value, allow_duplicates=False):
|
|
@@ -88,3 +97,22 @@ def df_insert(df, loc, column, value, allow_duplicates=False):
|
|
|
88
97
|
)
|
|
89
98
|
out_df = op(df)
|
|
90
99
|
df.data = out_df.data
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def index_insert(idx, loc, value):
|
|
103
|
+
"""
|
|
104
|
+
Make new Index inserting new item at location.
|
|
105
|
+
|
|
106
|
+
Follows Python list.append semantics for negative values.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
loc : int
|
|
111
|
+
item : object
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
new_index : Index
|
|
116
|
+
"""
|
|
117
|
+
op = DataFrameInsert(loc=loc, value=value)
|
|
118
|
+
return op(idx)
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from .append import append
|
|
16
|
+
from .combine import DataFrameCombine, df_combine, series_combine
|
|
16
17
|
from .combine_first import df_combine_first, series_combine_first
|
|
17
18
|
from .compare import DataFrameCompare, df_compare, series_compare
|
|
18
19
|
from .concat import DataFrameConcat, concat
|
|
@@ -36,11 +37,13 @@ def _install():
|
|
|
36
37
|
setattr(cls, "join", join)
|
|
37
38
|
setattr(cls, "merge", merge)
|
|
38
39
|
setattr(cls, "update", df_update)
|
|
40
|
+
setattr(cls, "combine", df_combine)
|
|
39
41
|
|
|
40
42
|
for cls in SERIES_TYPE:
|
|
41
43
|
setattr(cls, "combine_first", series_combine_first)
|
|
42
44
|
setattr(cls, "compare", series_compare)
|
|
43
45
|
setattr(cls, "update", series_update)
|
|
46
|
+
setattr(cls, "combine", series_combine)
|
|
44
47
|
|
|
45
48
|
for cls in DATAFRAME_TYPE + SERIES_TYPE:
|
|
46
49
|
setattr(cls, "append", append)
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField, BoolField, FunctionField
|
|
17
|
+
from ...udf import BuiltinFunction
|
|
18
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameCombine(DataFrameOperator, DataFrameOperatorMixin):
|
|
22
|
+
_op_type_ = opcodes.DATAFRAME_COMBINE
|
|
23
|
+
|
|
24
|
+
func = FunctionField("func")
|
|
25
|
+
fill_value = AnyField("fill_value")
|
|
26
|
+
overwrite = BoolField("overwrite")
|
|
27
|
+
|
|
28
|
+
def has_custom_code(self) -> bool:
|
|
29
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
30
|
+
|
|
31
|
+
def __call__(self, obj1, obj2):
|
|
32
|
+
from ..indexing.align import align
|
|
33
|
+
|
|
34
|
+
assert obj1.ndim == 1 and obj2.ndim == 1
|
|
35
|
+
obj1, obj2 = align(obj1, obj2)
|
|
36
|
+
# Create the output series based on the result series
|
|
37
|
+
return self.new_series(
|
|
38
|
+
[obj1, obj2],
|
|
39
|
+
shape=obj1.shape,
|
|
40
|
+
dtype=obj1.dtype,
|
|
41
|
+
index_value=obj1.index_value,
|
|
42
|
+
name=obj1.name,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def df_combine(df, other, func, fill_value=None, overwrite=True):
|
|
47
|
+
"""
|
|
48
|
+
Perform column-wise combine with another DataFrame.
|
|
49
|
+
|
|
50
|
+
Combines a DataFrame with `other` DataFrame using `func`
|
|
51
|
+
to element-wise combine columns. The row and column indexes of the
|
|
52
|
+
resulting DataFrame will be the union of the two.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
other : DataFrame
|
|
57
|
+
The DataFrame to merge column-wise.
|
|
58
|
+
func : function
|
|
59
|
+
Function that takes two series as inputs and return a Series or a
|
|
60
|
+
scalar. Used to merge the two dataframes column by columns.
|
|
61
|
+
fill_value : scalar value, default None
|
|
62
|
+
The value to fill NaNs with prior to passing any column to the
|
|
63
|
+
merge func.
|
|
64
|
+
overwrite : bool, default True
|
|
65
|
+
If True, columns in `self` that do not exist in `other` will be
|
|
66
|
+
overwritten with NaNs.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
DataFrame
|
|
71
|
+
Combination of the provided DataFrames.
|
|
72
|
+
|
|
73
|
+
See Also
|
|
74
|
+
--------
|
|
75
|
+
DataFrame.combine_first : Combine two DataFrame objects and default to
|
|
76
|
+
non-null values in frame calling the method.
|
|
77
|
+
|
|
78
|
+
Examples
|
|
79
|
+
--------
|
|
80
|
+
Combine using a simple function that chooses the smaller column.
|
|
81
|
+
|
|
82
|
+
>>> import maxframe.tensor as mt
|
|
83
|
+
>>> import maxframe.dataframe as md
|
|
84
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [4, 4]})
|
|
85
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
86
|
+
>>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
|
|
87
|
+
>>> df1.combine(df2, take_smaller).execute()
|
|
88
|
+
A B
|
|
89
|
+
0 0 3
|
|
90
|
+
1 0 3
|
|
91
|
+
|
|
92
|
+
Example using a true element-wise combine function.
|
|
93
|
+
|
|
94
|
+
>>> df1 = md.DataFrame({'A': [5, 0], 'B': [2, 4]})
|
|
95
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
96
|
+
>>> df1.combine(df2, mt.minimum).execute()
|
|
97
|
+
A B
|
|
98
|
+
0 1 2
|
|
99
|
+
1 0 3
|
|
100
|
+
|
|
101
|
+
Using `fill_value` fills Nones prior to passing the column to the
|
|
102
|
+
merge function.
|
|
103
|
+
|
|
104
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [None, 4]})
|
|
105
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
106
|
+
>>> df1.combine(df2, take_smaller, fill_value=-5).execute()
|
|
107
|
+
A B
|
|
108
|
+
0 0 -5.0
|
|
109
|
+
1 0 4.0
|
|
110
|
+
|
|
111
|
+
However, if the same element in both dataframes is None, that None
|
|
112
|
+
is preserved
|
|
113
|
+
|
|
114
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [None, 4]})
|
|
115
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [None, 3]})
|
|
116
|
+
>>> df1.combine(df2, take_smaller, fill_value=-5).execute()
|
|
117
|
+
A B
|
|
118
|
+
0 0 -5.0
|
|
119
|
+
1 0 3.0
|
|
120
|
+
|
|
121
|
+
Example that demonstrates the use of `overwrite` and behavior when
|
|
122
|
+
the axis differ between the dataframes.
|
|
123
|
+
|
|
124
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [4, 4]})
|
|
125
|
+
>>> df2 = md.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2])
|
|
126
|
+
>>> df1.combine(df2, take_smaller).execute()
|
|
127
|
+
A B C
|
|
128
|
+
0 NaN NaN NaN
|
|
129
|
+
1 NaN 3.0 -10.0
|
|
130
|
+
2 NaN 3.0 1.0
|
|
131
|
+
|
|
132
|
+
>>> df1.combine(df2, take_smaller, overwrite=False).execute()
|
|
133
|
+
A B C
|
|
134
|
+
0 0.0 NaN NaN
|
|
135
|
+
1 0.0 3.0 -10.0
|
|
136
|
+
2 NaN 3.0 1.0
|
|
137
|
+
|
|
138
|
+
Demonstrating the preference of the passed in dataframe.
|
|
139
|
+
|
|
140
|
+
>>> df2 = md.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2])
|
|
141
|
+
>>> df2.combine(df1, take_smaller).execute()
|
|
142
|
+
A B C
|
|
143
|
+
0 0.0 NaN NaN
|
|
144
|
+
1 0.0 3.0 NaN
|
|
145
|
+
2 NaN 3.0 NaN
|
|
146
|
+
|
|
147
|
+
>>> df2.combine(df1, take_smaller, overwrite=False).execute()
|
|
148
|
+
A B C
|
|
149
|
+
0 0.0 NaN NaN
|
|
150
|
+
1 0.0 3.0 1.0
|
|
151
|
+
2 NaN 3.0 1.0
|
|
152
|
+
"""
|
|
153
|
+
# todo merge series logic into whole dataframe to reduce latency
|
|
154
|
+
from ..indexing.align import align
|
|
155
|
+
from .concat import concat
|
|
156
|
+
|
|
157
|
+
src_df_cols = set(df.dtypes.index)
|
|
158
|
+
src_other_cols = set(other.dtypes.index)
|
|
159
|
+
|
|
160
|
+
df, other = align(df, other)
|
|
161
|
+
col_data = []
|
|
162
|
+
for c in df.dtypes.index:
|
|
163
|
+
if c in src_df_cols and c in src_other_cols:
|
|
164
|
+
col_data.append(func(df[c], other[c]))
|
|
165
|
+
elif c in src_other_cols and not overwrite:
|
|
166
|
+
col_data.append(df[c])
|
|
167
|
+
else:
|
|
168
|
+
col_data.append(other[c])
|
|
169
|
+
res = concat(col_data, axis=1)
|
|
170
|
+
if fill_value is not None:
|
|
171
|
+
res = res.fillna(fill_value)
|
|
172
|
+
return res
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def series_combine(series, other, func, fill_value=None):
|
|
176
|
+
"""
|
|
177
|
+
Combine the Series with a Series or scalar according to `func`.
|
|
178
|
+
|
|
179
|
+
Combine the Series and `other` using `func` to perform elementwise
|
|
180
|
+
selection for combined Series.
|
|
181
|
+
`fill_value` is assumed when value is missing at some index
|
|
182
|
+
from one of the two objects being combined.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
other : Series or scalar
|
|
187
|
+
The value(s) to be combined with the `Series`.
|
|
188
|
+
func : function
|
|
189
|
+
Function that takes two scalars as inputs and returns an element.
|
|
190
|
+
fill_value : scalar, optional
|
|
191
|
+
The value to assume when an index is missing from
|
|
192
|
+
one Series or the other. The default specifies to use the
|
|
193
|
+
appropriate NaN value for the underlying dtype of the Series.
|
|
194
|
+
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
Series
|
|
198
|
+
The result of combining the Series with the other object.
|
|
199
|
+
|
|
200
|
+
See Also
|
|
201
|
+
--------
|
|
202
|
+
Series.combine_first : Combine Series values, choosing the calling
|
|
203
|
+
Series' values first.
|
|
204
|
+
|
|
205
|
+
Examples
|
|
206
|
+
--------
|
|
207
|
+
Consider 2 Datasets ``s1`` and ``s2`` containing
|
|
208
|
+
highest clocked speeds of different birds.
|
|
209
|
+
|
|
210
|
+
>>> import maxframe.dataframe as md
|
|
211
|
+
>>> s1 = md.Series({'falcon': 330.0, 'eagle': 160.0})
|
|
212
|
+
>>> s1.execute()
|
|
213
|
+
falcon 330.0
|
|
214
|
+
eagle 160.0
|
|
215
|
+
dtype: float64
|
|
216
|
+
>>> s2 = md.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
|
|
217
|
+
>>> s2.execute()
|
|
218
|
+
falcon 345.0
|
|
219
|
+
eagle 200.0
|
|
220
|
+
duck 30.0
|
|
221
|
+
dtype: float64
|
|
222
|
+
|
|
223
|
+
Now, to combine the two datasets and view the highest speeds
|
|
224
|
+
of the birds across the two datasets
|
|
225
|
+
|
|
226
|
+
>>> s1.combine(s2, max).execute()
|
|
227
|
+
duck NaN
|
|
228
|
+
eagle 200.0
|
|
229
|
+
falcon 345.0
|
|
230
|
+
dtype: float64
|
|
231
|
+
|
|
232
|
+
In the previous example, the resulting value for duck is missing,
|
|
233
|
+
because the maximum of a NaN and a float is a NaN.
|
|
234
|
+
So, in the example, we set ``fill_value=0``,
|
|
235
|
+
so the maximum value returned will be the value from some dataset.
|
|
236
|
+
|
|
237
|
+
>>> s1.combine(s2, max, fill_value=0).execute()
|
|
238
|
+
duck 30.0
|
|
239
|
+
eagle 200.0
|
|
240
|
+
falcon 345.0
|
|
241
|
+
dtype: float64
|
|
242
|
+
"""
|
|
243
|
+
op = DataFrameCombine(func=func, fill_value=fill_value, overwrite=True)
|
|
244
|
+
return op(series, other)
|
|
@@ -21,12 +21,12 @@ from .check_monotonic import (
|
|
|
21
21
|
is_monotonic_decreasing,
|
|
22
22
|
is_monotonic_increasing,
|
|
23
23
|
)
|
|
24
|
-
from .check_unique import
|
|
24
|
+
from .check_unique import index_is_unique, series_is_unique
|
|
25
25
|
from .clip import clip
|
|
26
26
|
from .cut import cut
|
|
27
27
|
from .describe import describe
|
|
28
28
|
from .diff import df_diff, series_diff
|
|
29
|
-
from .drop import df_drop, df_pop, index_drop, series_drop
|
|
29
|
+
from .drop import df_drop, df_pop, index_drop, series_drop, series_pop
|
|
30
30
|
from .drop_duplicates import (
|
|
31
31
|
df_drop_duplicates,
|
|
32
32
|
index_drop_duplicates,
|
|
@@ -35,12 +35,14 @@ from .drop_duplicates import (
|
|
|
35
35
|
from .duplicated import df_duplicated, index_duplicated, series_duplicated
|
|
36
36
|
from .eval import df_eval, df_query
|
|
37
37
|
from .explode import df_explode, series_explode
|
|
38
|
+
from .infer_dtypes import convert_dtypes, infer_objects
|
|
38
39
|
from .isin import df_isin, series_isin
|
|
39
40
|
from .map import df_map, index_map, series_map
|
|
40
41
|
from .memory_usage import df_memory_usage, index_memory_usage, series_memory_usage
|
|
41
42
|
from .pct_change import pct_change
|
|
42
43
|
from .qcut import qcut
|
|
43
44
|
from .rechunk import rechunk
|
|
45
|
+
from .repeat import index_repeat, series_repeat
|
|
44
46
|
from .select_dtypes import select_dtypes
|
|
45
47
|
from .shift import shift, tshift
|
|
46
48
|
from .transform import df_transform, series_transform
|
|
@@ -57,6 +59,7 @@ def _install():
|
|
|
57
59
|
setattr(t, "applymap", df_map)
|
|
58
60
|
setattr(t, "astype", astype)
|
|
59
61
|
setattr(t, "clip", clip)
|
|
62
|
+
setattr(t, "convert_dtypes", convert_dtypes)
|
|
60
63
|
setattr(t, "describe", describe)
|
|
61
64
|
setattr(
|
|
62
65
|
t, "__delitem__", lambda df, items: df_drop(df, items, axis=1, inplace=True)
|
|
@@ -68,6 +71,7 @@ def _install():
|
|
|
68
71
|
setattr(t, "eval", df_eval)
|
|
69
72
|
setattr(t, "explode", df_explode)
|
|
70
73
|
setattr(t, "first_valid_index", first_valid_index)
|
|
74
|
+
setattr(t, "infer_objects", infer_objects)
|
|
71
75
|
setattr(t, "isin", df_isin)
|
|
72
76
|
setattr(t, "last_valid_index", last_valid_index)
|
|
73
77
|
setattr(t, "map", df_map)
|
|
@@ -89,6 +93,7 @@ def _install():
|
|
|
89
93
|
setattr(t, "case_when", case_when)
|
|
90
94
|
setattr(t, "check_monotonic", check_monotonic)
|
|
91
95
|
setattr(t, "clip", clip)
|
|
96
|
+
setattr(t, "convert_dtypes", convert_dtypes)
|
|
92
97
|
setattr(t, "describe", describe)
|
|
93
98
|
setattr(t, "diff", series_diff)
|
|
94
99
|
setattr(t, "drop", series_drop)
|
|
@@ -96,16 +101,19 @@ def _install():
|
|
|
96
101
|
setattr(t, "duplicated", series_duplicated)
|
|
97
102
|
setattr(t, "explode", series_explode)
|
|
98
103
|
setattr(t, "first_valid_index", first_valid_index)
|
|
104
|
+
setattr(t, "infer_objects", infer_objects)
|
|
99
105
|
setattr(t, "is_monotonic", property(fget=is_monotonic))
|
|
100
106
|
setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
|
|
101
107
|
setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
|
|
102
108
|
setattr(t, "isin", series_isin)
|
|
103
|
-
setattr(t, "is_unique", property(fget=
|
|
109
|
+
setattr(t, "is_unique", property(fget=series_is_unique))
|
|
104
110
|
setattr(t, "last_valid_index", last_valid_index)
|
|
105
111
|
setattr(t, "map", series_map)
|
|
106
112
|
setattr(t, "memory_usage", series_memory_usage)
|
|
107
113
|
setattr(t, "pct_change", pct_change)
|
|
114
|
+
setattr(t, "pop", series_pop)
|
|
108
115
|
setattr(t, "rechunk", rechunk)
|
|
116
|
+
setattr(t, "repeat", series_repeat)
|
|
109
117
|
setattr(t, "shift", shift)
|
|
110
118
|
setattr(t, "transform", series_transform)
|
|
111
119
|
setattr(t, "tshift", tshift)
|
|
@@ -118,12 +126,15 @@ def _install():
|
|
|
118
126
|
setattr(t, "drop", index_drop)
|
|
119
127
|
setattr(t, "drop_duplicates", index_drop_duplicates)
|
|
120
128
|
setattr(t, "duplicated", index_duplicated)
|
|
129
|
+
setattr(t, "has_duplicates", property(fget=lambda x: not index_is_unique(x)))
|
|
121
130
|
setattr(t, "is_monotonic", property(fget=is_monotonic))
|
|
122
131
|
setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
|
|
123
132
|
setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
|
|
133
|
+
setattr(t, "is_unique", property(fget=index_is_unique))
|
|
124
134
|
setattr(t, "map", index_map)
|
|
125
135
|
setattr(t, "memory_usage", index_memory_usage)
|
|
126
136
|
setattr(t, "rechunk", rechunk)
|
|
137
|
+
setattr(t, "repeat", index_repeat)
|
|
127
138
|
setattr(t, "value_counts", value_counts)
|
|
128
139
|
|
|
129
140
|
|
|
@@ -18,13 +18,27 @@ from ...udf import builtin_function
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
@builtin_function
|
|
21
|
-
def _tailor_unique(
|
|
22
|
-
if not
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
def _tailor_unique(series_or_idx):
|
|
22
|
+
if not series_or_idx.is_unique:
|
|
23
|
+
if isinstance(series_or_idx, pd.Series):
|
|
24
|
+
return series_or_idx.iloc[:0]
|
|
25
|
+
else:
|
|
26
|
+
return series_or_idx[:0]
|
|
27
|
+
return series_or_idx
|
|
25
28
|
|
|
26
29
|
|
|
27
|
-
def
|
|
30
|
+
def _is_unique(series_or_index):
|
|
31
|
+
from ... import tensor as mt
|
|
32
|
+
|
|
33
|
+
return mt.equal(
|
|
34
|
+
series_or_index.mf.apply_chunk(
|
|
35
|
+
_tailor_unique, dtype=series_or_index.dtype
|
|
36
|
+
).nunique(),
|
|
37
|
+
mt.shape(series_or_index)[0],
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def series_is_unique(series):
|
|
28
42
|
"""
|
|
29
43
|
Return boolean if values in the object are unique.
|
|
30
44
|
|
|
@@ -43,9 +57,26 @@ def is_unique(series):
|
|
|
43
57
|
>>> s.is_unique.execute()
|
|
44
58
|
False
|
|
45
59
|
"""
|
|
46
|
-
|
|
60
|
+
return _is_unique(series)
|
|
47
61
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
62
|
+
|
|
63
|
+
def index_is_unique(index):
|
|
64
|
+
"""
|
|
65
|
+
Return boolean if values in the index are unique.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
bool
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
>>> import maxframe.dataframe as md
|
|
74
|
+
>>> index = md.Index([1, 2, 3])
|
|
75
|
+
>>> index.is_unique.execute()
|
|
76
|
+
True
|
|
77
|
+
|
|
78
|
+
>>> index = md.Index([1, 2, 3, 1])
|
|
79
|
+
>>> index.is_unique.execute()
|
|
80
|
+
False
|
|
81
|
+
"""
|
|
82
|
+
return index.to_series().is_unique
|
maxframe/dataframe/misc/drop.py
CHANGED
|
@@ -419,6 +419,37 @@ def series_drop(
|
|
|
419
419
|
)
|
|
420
420
|
|
|
421
421
|
|
|
422
|
+
def series_pop(series, item):
|
|
423
|
+
"""
|
|
424
|
+
Return item and drops from series. Raise KeyError if not found.
|
|
425
|
+
|
|
426
|
+
Parameters
|
|
427
|
+
----------
|
|
428
|
+
item : label
|
|
429
|
+
Index of the element that needs to be removed.
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
Value that is popped from series.
|
|
434
|
+
|
|
435
|
+
Examples
|
|
436
|
+
--------
|
|
437
|
+
>>> import maxframe.dataframe as md
|
|
438
|
+
>>> ser = md.Series([1,2,3])
|
|
439
|
+
|
|
440
|
+
>>> ser.pop(0).execute()
|
|
441
|
+
1
|
|
442
|
+
|
|
443
|
+
>>> ser.execute()
|
|
444
|
+
1 2
|
|
445
|
+
2 3
|
|
446
|
+
dtype: int64
|
|
447
|
+
"""
|
|
448
|
+
scalar = series.data[item]
|
|
449
|
+
series_drop(series, item, inplace=True)
|
|
450
|
+
return scalar
|
|
451
|
+
|
|
452
|
+
|
|
422
453
|
def index_drop(index, labels, errors="raise"):
|
|
423
454
|
"""
|
|
424
455
|
Make new Index with passed list of labels deleted.
|