maxframe 0.1.0b5__cp37-cp37m-win_amd64.whl → 1.0.0__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp37-win_amd64.pyd +0 -0
- maxframe/codegen.py +10 -4
- maxframe/config/config.py +68 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +31 -7
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +117 -23
- maxframe/dataframe/datasource/read_odps_table.py +6 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +33 -2
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +26 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
- maxframe/learn/contrib/xgboost/predict.py +29 -46
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +29 -18
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +8 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/{base → misc}/unique.py +3 -3
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +106 -86
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +3 -3
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +81 -74
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +194 -40
- maxframe_client/session/task.py +94 -39
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +109 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -322
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ...serialization.serializables import ListField
|
|
20
|
+
from ...serialization.serializables.field_type import FieldTypes
|
|
21
|
+
from ..core import DataFrame
|
|
22
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
23
|
+
from ..utils import make_dtypes, parse_index
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SeriesFlatJSONOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
27
|
+
_op_type_ = opcodes.FLATJSON
|
|
28
|
+
|
|
29
|
+
query_paths = ListField("query_paths", field_type=FieldTypes.string, default=None)
|
|
30
|
+
|
|
31
|
+
def __call__(self, series, dtypes):
|
|
32
|
+
if self._output_types[0] == OutputType.series:
|
|
33
|
+
name, dtype = dtypes
|
|
34
|
+
return self.new_series(
|
|
35
|
+
[series],
|
|
36
|
+
shape=series.shape,
|
|
37
|
+
index_value=series.index_value,
|
|
38
|
+
name=name,
|
|
39
|
+
dtype=dtype,
|
|
40
|
+
)
|
|
41
|
+
return self.new_dataframe(
|
|
42
|
+
[series],
|
|
43
|
+
shape=(series.shape[0], len(dtypes)),
|
|
44
|
+
index_value=series.index_value,
|
|
45
|
+
columns_value=parse_index(dtypes.index, store_data=True),
|
|
46
|
+
dtypes=make_dtypes(dtypes),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def series_flatjson(
|
|
51
|
+
series,
|
|
52
|
+
query_paths: List[str],
|
|
53
|
+
dtypes=None,
|
|
54
|
+
dtype=None,
|
|
55
|
+
name: str = None,
|
|
56
|
+
) -> DataFrame:
|
|
57
|
+
"""
|
|
58
|
+
Flat JSON object in the series to a dataframe according to JSON query.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
series : Series
|
|
63
|
+
The series of json strings.
|
|
64
|
+
|
|
65
|
+
query_paths: List[str] or str
|
|
66
|
+
The JSON query paths for each generated column. The path format should follow
|
|
67
|
+
[RFC9535](https://datatracker.ietf.org/doc/rfc9535/).
|
|
68
|
+
|
|
69
|
+
dtypes : Series, default None
|
|
70
|
+
Specify dtypes of returned DataFrame. Can't work with dtype.
|
|
71
|
+
|
|
72
|
+
dtype : numpy.dtype, default None
|
|
73
|
+
Specify dtype of returned Series. Can't work with dtypes.
|
|
74
|
+
|
|
75
|
+
name : str, default None
|
|
76
|
+
Specify name of the returned Series.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
DataFrame or Series
|
|
81
|
+
Result of DataFrame when dtypes specified, else Series.
|
|
82
|
+
|
|
83
|
+
Examples
|
|
84
|
+
--------
|
|
85
|
+
>>> import maxframe.dataframe as md
|
|
86
|
+
>>> import pandas as pd
|
|
87
|
+
>>> s = md.Series(
|
|
88
|
+
... [
|
|
89
|
+
... '{"age": 24, "gender": "male", "graduated": false}',
|
|
90
|
+
... '{"age": 25, "gender": "female", "graduated": true}',
|
|
91
|
+
... ]
|
|
92
|
+
... )
|
|
93
|
+
>>> s.execute()
|
|
94
|
+
0 {"age": 24, "gender": "male", "graduated": false}
|
|
95
|
+
1 {"age": 25, "gender": "female", "graduated": true}
|
|
96
|
+
dtype: object
|
|
97
|
+
|
|
98
|
+
>>> df = s.mf.flatjson(
|
|
99
|
+
... ["$.age", "$.gender", "$.graduated"],
|
|
100
|
+
... dtypes=pd.Series(["int32", "object", "bool"], index=["age", "gender", "graduated"]),
|
|
101
|
+
... )
|
|
102
|
+
>>> df.execute()
|
|
103
|
+
age gender graduated
|
|
104
|
+
0 24 male True
|
|
105
|
+
1 25 female True
|
|
106
|
+
|
|
107
|
+
>>> s2 = s.mf.flatjson("$.age", name="age", dtype="int32")
|
|
108
|
+
>>> s2.execute()
|
|
109
|
+
0 24
|
|
110
|
+
1 25
|
|
111
|
+
Name: age, dtype: int32
|
|
112
|
+
"""
|
|
113
|
+
if isinstance(query_paths, str):
|
|
114
|
+
query_paths = [query_paths]
|
|
115
|
+
if dtypes is not None and dtype is not None:
|
|
116
|
+
raise ValueError("Both dtypes and dtype cannot be specified at the same time.")
|
|
117
|
+
if dtype is not None:
|
|
118
|
+
if len(query_paths) != 1:
|
|
119
|
+
raise ValueError("query_paths should have only one path if dtype is set")
|
|
120
|
+
output_type = OutputType.series
|
|
121
|
+
elif dtypes is not None:
|
|
122
|
+
if len(dtypes) != len(query_paths):
|
|
123
|
+
raise ValueError("query_paths and dtypes should have same length")
|
|
124
|
+
output_type = OutputType.dataframe
|
|
125
|
+
else:
|
|
126
|
+
raise ValueError("dtypes or dtype should be specified")
|
|
127
|
+
|
|
128
|
+
dtypes = (name, dtype) if dtype is not None else dtypes
|
|
129
|
+
return SeriesFlatJSONOperator(query_paths=query_paths, _output_types=[output_type])(
|
|
130
|
+
series, dtypes
|
|
131
|
+
)
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Callable
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import OutputType
|
|
22
|
+
from ...serialization.serializables import (
|
|
23
|
+
BoolField,
|
|
24
|
+
DictField,
|
|
25
|
+
FunctionField,
|
|
26
|
+
TupleField,
|
|
27
|
+
)
|
|
28
|
+
from ..core import DataFrame
|
|
29
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
30
|
+
from ..utils import gen_unknown_index_value, make_dtypes, parse_index
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
34
|
+
_op_type_ = opcodes.FLATMAP
|
|
35
|
+
|
|
36
|
+
func = FunctionField("func")
|
|
37
|
+
raw = BoolField("raw", default=False)
|
|
38
|
+
args = TupleField("args", default=())
|
|
39
|
+
kwargs = DictField("kwargs", default={})
|
|
40
|
+
|
|
41
|
+
def __init__(self, output_types=None, **kw):
|
|
42
|
+
super().__init__(_output_types=output_types, **kw)
|
|
43
|
+
|
|
44
|
+
def _call_dataframe(self, df: DataFrame, dtypes: pd.Series):
|
|
45
|
+
dtypes = make_dtypes(dtypes)
|
|
46
|
+
index_value = gen_unknown_index_value(
|
|
47
|
+
df.index_value,
|
|
48
|
+
(df.key, df.index_value.key, self.func),
|
|
49
|
+
normalize_range_index=True,
|
|
50
|
+
)
|
|
51
|
+
return self.new_dataframe(
|
|
52
|
+
[df],
|
|
53
|
+
shape=(np.nan, len(dtypes)),
|
|
54
|
+
index_value=index_value,
|
|
55
|
+
columns_value=parse_index(dtypes.index, store_data=True),
|
|
56
|
+
dtypes=dtypes,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _call_series_or_index(self, series, dtypes=None):
|
|
60
|
+
index_value = gen_unknown_index_value(
|
|
61
|
+
series.index_value,
|
|
62
|
+
(series.key, series.index_value.key, self.func),
|
|
63
|
+
normalize_range_index=True,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if self.output_types[0] == OutputType.series:
|
|
67
|
+
name, dtype = dtypes
|
|
68
|
+
return self.new_series(
|
|
69
|
+
[series],
|
|
70
|
+
dtype=dtype,
|
|
71
|
+
shape=(np.nan,),
|
|
72
|
+
index_value=index_value,
|
|
73
|
+
name=name,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
dtypes = make_dtypes(dtypes)
|
|
77
|
+
columns_value = parse_index(dtypes.index, store_data=True)
|
|
78
|
+
return self.new_dataframe(
|
|
79
|
+
[series],
|
|
80
|
+
shape=(np.nan, len(dtypes)),
|
|
81
|
+
index_value=index_value,
|
|
82
|
+
columns_value=columns_value,
|
|
83
|
+
dtypes=dtypes,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def __call__(
|
|
87
|
+
self,
|
|
88
|
+
df_or_series,
|
|
89
|
+
dtypes=None,
|
|
90
|
+
output_type=None,
|
|
91
|
+
):
|
|
92
|
+
if df_or_series.op.output_types[0] == OutputType.dataframe:
|
|
93
|
+
return self._call_dataframe(df_or_series, dtypes=dtypes)
|
|
94
|
+
else:
|
|
95
|
+
return self._call_series_or_index(df_or_series, dtypes=dtypes)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def df_flatmap(dataframe, func: Callable, dtypes=None, raw=False, args=(), **kwargs):
|
|
99
|
+
"""
|
|
100
|
+
Apply the given function to each row and then flatten results. Use this method if your transformation returns
|
|
101
|
+
multiple rows for each input row.
|
|
102
|
+
|
|
103
|
+
This function applies a transformation to each row of the DataFrame, where the transformation can return zero
|
|
104
|
+
or multiple values, effectively flattening Python generators, list-like collections, and DataFrames.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
dataframe : DataFrame
|
|
109
|
+
The DataFrame to which the function will be applied.
|
|
110
|
+
|
|
111
|
+
func : Callable
|
|
112
|
+
Function to apply to each row of the DataFrame. It should accept a Series (or an array if `raw=True`)
|
|
113
|
+
representing a row and return a list or iterable of values.
|
|
114
|
+
|
|
115
|
+
dtypes : Series, dict or list
|
|
116
|
+
Specify dtypes of returned DataFrame.
|
|
117
|
+
|
|
118
|
+
raw : bool, default False
|
|
119
|
+
Determines if the row is passed as a Series or as a numpy array:
|
|
120
|
+
|
|
121
|
+
* ``False`` : passes each row as a Series to the function.
|
|
122
|
+
* ``True`` : the passed function will receive numpy array objects instead.
|
|
123
|
+
|
|
124
|
+
args : tuple
|
|
125
|
+
Positional arguments to pass to `func`.
|
|
126
|
+
|
|
127
|
+
**kwargs
|
|
128
|
+
Additional keyword arguments to pass as keywords arguments to `func`.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
DataFrame
|
|
133
|
+
Return DataFrame with specified `dtypes`.
|
|
134
|
+
|
|
135
|
+
Notes
|
|
136
|
+
-----
|
|
137
|
+
The `func` must return an iterable of values for each input row. The index of the resulting DataFrame will be
|
|
138
|
+
repeated based on the number of output rows generated by `func`.
|
|
139
|
+
|
|
140
|
+
Examples
|
|
141
|
+
--------
|
|
142
|
+
>>> import numpy as np
|
|
143
|
+
>>> import maxframe.dataframe as md
|
|
144
|
+
>>> df = md.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
|
|
145
|
+
>>> df.execute()
|
|
146
|
+
A B
|
|
147
|
+
0 1 4
|
|
148
|
+
1 2 5
|
|
149
|
+
2 3 6
|
|
150
|
+
|
|
151
|
+
Define a function that takes a number and returns a list of two numbers:
|
|
152
|
+
|
|
153
|
+
>>> def generate_values_array(row):
|
|
154
|
+
... return [row['A'] * 2, row['B'] * 3]
|
|
155
|
+
|
|
156
|
+
Define a function that takes a row and return two rows and two columns:
|
|
157
|
+
|
|
158
|
+
>>> def generate_values_in_generator(row):
|
|
159
|
+
... yield [row[0] * 2, row[1] * 4]
|
|
160
|
+
... yield [row[0] * 3, row[1] * 5]
|
|
161
|
+
|
|
162
|
+
Which equals to the following function return a dataframe:
|
|
163
|
+
|
|
164
|
+
>>> def generate_values_in_dataframe(row):
|
|
165
|
+
... return pd.DataFrame([[row[0] * 2, row[1] * 4], [row[0] * 3, row[1] * 5]])
|
|
166
|
+
|
|
167
|
+
Specify `dtypes` with a function which returns a DataFrame:
|
|
168
|
+
|
|
169
|
+
>>> df.mf.flatmap(generate_values_array, dtypes=pd.Series({'A': 'int'})).execute()
|
|
170
|
+
A
|
|
171
|
+
0 2
|
|
172
|
+
0 12
|
|
173
|
+
1 4
|
|
174
|
+
1 15
|
|
175
|
+
2 6
|
|
176
|
+
2 18
|
|
177
|
+
|
|
178
|
+
Specify raw=True to pass input row as array:
|
|
179
|
+
|
|
180
|
+
>>> df.mf.flatmap(generate_values_in_generator, dtypes={"A": "int", "B": "int"}, raw=True).execute()
|
|
181
|
+
A B
|
|
182
|
+
0 2 16
|
|
183
|
+
0 3 20
|
|
184
|
+
1 4 20
|
|
185
|
+
1 6 25
|
|
186
|
+
2 6 24
|
|
187
|
+
2 9 30
|
|
188
|
+
"""
|
|
189
|
+
if dtypes is None or len(dtypes) == 0:
|
|
190
|
+
raise TypeError(
|
|
191
|
+
"Cannot determine {dtypes} by calculating with enumerate data, "
|
|
192
|
+
"please specify it as arguments"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if not isinstance(func, Callable):
|
|
196
|
+
raise TypeError("function must be a callable object")
|
|
197
|
+
|
|
198
|
+
output_types = [OutputType.dataframe]
|
|
199
|
+
op = DataFrameFlatMapOperator(
|
|
200
|
+
func=func, raw=raw, output_types=output_types, args=args, kwargs=kwargs
|
|
201
|
+
)
|
|
202
|
+
return op(
|
|
203
|
+
dataframe,
|
|
204
|
+
dtypes=dtypes,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def series_flatmap(
|
|
209
|
+
series, func: Callable, dtypes=None, dtype=None, name=None, args=(), **kwargs
|
|
210
|
+
):
|
|
211
|
+
"""
|
|
212
|
+
Apply the given function to each row and then flatten results. Use this method if your transformation returns
|
|
213
|
+
multiple rows for each input row.
|
|
214
|
+
|
|
215
|
+
This function applies a transformation to each element of the Series, where the transformation can return zero
|
|
216
|
+
or multiple values, effectively flattening Python generator, list-liked collections and DataFrame.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
series : Series
|
|
221
|
+
The series to which the function will be applied.
|
|
222
|
+
|
|
223
|
+
func : Callable
|
|
224
|
+
Function to apply to each element of the Series. It should accept a scalar value
|
|
225
|
+
(or an array if `raw=True`) and return a list or iterable of values.
|
|
226
|
+
|
|
227
|
+
dtypes : Series, default None
|
|
228
|
+
Specify dtypes of returned DataFrame. Can't work with dtype.
|
|
229
|
+
|
|
230
|
+
dtype : numpy.dtype, default None
|
|
231
|
+
Specify dtype of returned Series. Can't work with dtypes.
|
|
232
|
+
|
|
233
|
+
name : str, default None
|
|
234
|
+
Specify name of the returned Series.
|
|
235
|
+
|
|
236
|
+
args : tuple
|
|
237
|
+
Positional arguments to pass to `func`.
|
|
238
|
+
|
|
239
|
+
**kwargs
|
|
240
|
+
Additional keyword arguments to pass as keywords arguments to `func`.
|
|
241
|
+
|
|
242
|
+
Returns
|
|
243
|
+
-------
|
|
244
|
+
DataFrame or Series
|
|
245
|
+
Result of DataFrame when dtypes specified, else Series.
|
|
246
|
+
|
|
247
|
+
Notes
|
|
248
|
+
-----
|
|
249
|
+
The `func` must return an iterable of values for each input element. If `dtypes` is specified,
|
|
250
|
+
`flatmap` will return a DataFrame, if `dtype` and `name` is specified, a Series will be returned. The index of
|
|
251
|
+
the resulting DataFrame/Series will be repeated based on the number of output rows generated by `func`.
|
|
252
|
+
|
|
253
|
+
Examples
|
|
254
|
+
--------
|
|
255
|
+
>>> import numpy as np
|
|
256
|
+
>>> import maxframe.dataframe as md
|
|
257
|
+
>>> df = md.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
|
|
258
|
+
>>> df.execute()
|
|
259
|
+
A B
|
|
260
|
+
0 1 4
|
|
261
|
+
1 2 5
|
|
262
|
+
2 3 6
|
|
263
|
+
|
|
264
|
+
Define a function that takes a number and returns a list of two numbers:
|
|
265
|
+
|
|
266
|
+
>>> def generate_values_array(x):
|
|
267
|
+
... return [x * 2, x * 3]
|
|
268
|
+
|
|
269
|
+
>>> def generate_values_in_generator(x):
|
|
270
|
+
... yield pd.Series([x * 2, x * 4])
|
|
271
|
+
... yield pd.Series([x * 3, x * 5])
|
|
272
|
+
|
|
273
|
+
Specify `dtype` with a function which returns list to return more than one elements as a Series:
|
|
274
|
+
|
|
275
|
+
>>> df['A'].mf.flatmap(generate_values_array, dtype="int", name="C").execute()
|
|
276
|
+
0 2
|
|
277
|
+
0 3
|
|
278
|
+
1 4
|
|
279
|
+
1 6
|
|
280
|
+
2 6
|
|
281
|
+
2 9
|
|
282
|
+
Name: C, dtype: int64
|
|
283
|
+
|
|
284
|
+
Specify `dtypes` to return multi columns as a DataFrame:
|
|
285
|
+
|
|
286
|
+
>>> df['A'].mf.flatmap(generate_values_in_generator, dtypes={"A": "int", "B": "int"}).execute()
|
|
287
|
+
A B
|
|
288
|
+
0 2 4
|
|
289
|
+
0 3 5
|
|
290
|
+
1 4 8
|
|
291
|
+
1 6 10
|
|
292
|
+
2 6 12
|
|
293
|
+
2 9 15
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
if dtypes is not None and dtype is not None:
|
|
297
|
+
raise ValueError("Both dtypes and dtype cannot be specified at the same time.")
|
|
298
|
+
|
|
299
|
+
dtypes = (name, dtype) if dtype is not None else dtypes
|
|
300
|
+
if dtypes is None:
|
|
301
|
+
raise TypeError(
|
|
302
|
+
"Cannot determine {dtypes} or {dtype} by calculating with enumerate data, "
|
|
303
|
+
"please specify it as arguments"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
if not isinstance(func, Callable):
|
|
307
|
+
raise TypeError("function must be a callable object")
|
|
308
|
+
|
|
309
|
+
output_type = OutputType.series if dtype is not None else OutputType.dataframe
|
|
310
|
+
|
|
311
|
+
op = DataFrameFlatMapOperator(
|
|
312
|
+
func=func, raw=False, output_types=[output_type], args=args, kwargs=kwargs
|
|
313
|
+
)
|
|
314
|
+
return op(
|
|
315
|
+
series,
|
|
316
|
+
dtypes=dtypes,
|
|
317
|
+
)
|
|
@@ -38,7 +38,7 @@ class DataFrameReshuffle(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
38
38
|
else:
|
|
39
39
|
idx_value = df.index_value
|
|
40
40
|
if isinstance(idx_value.value, IndexValue.RangeIndex):
|
|
41
|
-
idx_value = parse_index(pd.
|
|
41
|
+
idx_value = parse_index(pd.RangeIndex(1))
|
|
42
42
|
params = df.params
|
|
43
43
|
params["index_value"] = idx_value
|
|
44
44
|
self._output_types = get_output_types(df)
|
|
@@ -11,12 +11,14 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import numpy as np
|
|
15
15
|
import pandas as pd
|
|
16
16
|
import pytest
|
|
17
17
|
|
|
18
18
|
from .... import dataframe as md
|
|
19
|
-
from
|
|
19
|
+
from ....tests.utils import assert_mf_index_dtype
|
|
20
|
+
from ... import DataFrame
|
|
21
|
+
from ...core import DATAFRAME_TYPE, SERIES_TYPE, IndexValue
|
|
20
22
|
from ..reshuffle import DataFrameReshuffle
|
|
21
23
|
|
|
22
24
|
|
|
@@ -31,8 +33,111 @@ def test_reshuffle():
|
|
|
31
33
|
|
|
32
34
|
r = mdf.mf.reshuffle()
|
|
33
35
|
assert isinstance(r.op, DataFrameReshuffle)
|
|
34
|
-
|
|
36
|
+
assert_mf_index_dtype(r.index_value.value, np.int64)
|
|
35
37
|
|
|
36
38
|
r = mdf.mf.reshuffle(ignore_index=True)
|
|
37
39
|
assert isinstance(r.op, DataFrameReshuffle)
|
|
38
40
|
assert isinstance(r.index_value.value, IndexValue.RangeIndex)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.fixture
|
|
44
|
+
def df1():
|
|
45
|
+
return DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@pytest.fixture
|
|
49
|
+
def df2():
|
|
50
|
+
return DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["a", "b", "c"])
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@pytest.fixture
|
|
54
|
+
def df3():
|
|
55
|
+
return DataFrame(
|
|
56
|
+
[[1, 2, 3], [1, 2, 3], [1, 2, 3]],
|
|
57
|
+
columns=["a", "b", "c"],
|
|
58
|
+
index=pd.MultiIndex.from_arrays([[1, 2, 3], [1, 2, 3]], names=["A", "B"]),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_flatmap(df1, df2, df3):
|
|
63
|
+
def f(x, keys):
|
|
64
|
+
if x["a"] in keys:
|
|
65
|
+
yield [1, 0]
|
|
66
|
+
yield [0, 1]
|
|
67
|
+
|
|
68
|
+
apply_df = df1[["a"]].mf.flatmap(
|
|
69
|
+
f,
|
|
70
|
+
dtypes={"a": "int64", "b": "int64"},
|
|
71
|
+
)
|
|
72
|
+
assert apply_df.shape == (np.nan, 2)
|
|
73
|
+
assert df1.index_value.key != apply_df.index_value.key
|
|
74
|
+
assert isinstance(df1.index_value.to_pandas(), pd.RangeIndex)
|
|
75
|
+
assert not isinstance(apply_df.index_value.to_pandas(), pd.RangeIndex)
|
|
76
|
+
apply_df = df2[["a"]].mf.flatmap(
|
|
77
|
+
f,
|
|
78
|
+
dtypes=pd.Series(["int64", "int64"]),
|
|
79
|
+
)
|
|
80
|
+
assert apply_df.shape == (np.nan, 2)
|
|
81
|
+
assert df2.index_value.key != apply_df.index_value.key
|
|
82
|
+
with pytest.raises(TypeError):
|
|
83
|
+
apply_s = df3["a"].mf.flatmap(
|
|
84
|
+
f,
|
|
85
|
+
)
|
|
86
|
+
apply_s = df3["a"].mf.flatmap(
|
|
87
|
+
f,
|
|
88
|
+
dtype="int64",
|
|
89
|
+
)
|
|
90
|
+
assert apply_s.shape == (np.nan,)
|
|
91
|
+
assert df3.index_value.key != apply_s.index_value.key
|
|
92
|
+
assert df3.key != apply_s.index_value.key
|
|
93
|
+
apply_s = df3["a"].mf.flatmap(
|
|
94
|
+
f,
|
|
95
|
+
output_type="dataframe",
|
|
96
|
+
dtypes=["int64", "int64"],
|
|
97
|
+
)
|
|
98
|
+
assert apply_s.shape == (np.nan, 2)
|
|
99
|
+
assert df3.index_value.key != apply_s.index_value.key
|
|
100
|
+
assert df3.key != apply_s.index_value.key
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_flatjson():
|
|
104
|
+
s1 = md.Series(["{{'a': 1, 'b': false}}"], index=[1])
|
|
105
|
+
df1 = s1.mf.flatjson(
|
|
106
|
+
["$.a", "$.b"], dtypes=pd.Series(["int32", "bool"], index=["a", "b"])
|
|
107
|
+
)
|
|
108
|
+
assert df1.shape == (1, 2)
|
|
109
|
+
assert df1.index_value.key == s1.index_value.key
|
|
110
|
+
assert isinstance(df1, DATAFRAME_TYPE)
|
|
111
|
+
assert list(df1.dtypes) == [np.dtype("int32"), np.dtype("bool")]
|
|
112
|
+
assert list(df1.dtypes.index) == ["a", "b"]
|
|
113
|
+
|
|
114
|
+
df2 = s1.mf.flatjson(["$.a"], dtypes=pd.Series(["int32"], index=["a"]))
|
|
115
|
+
assert df2.shape == (1, 1)
|
|
116
|
+
assert df2.index_value.key == s1.index_value.key
|
|
117
|
+
assert isinstance(df2, DATAFRAME_TYPE)
|
|
118
|
+
assert list(df2.dtypes) == [np.dtype("int32")]
|
|
119
|
+
assert list(df2.dtypes.index) == ["a"]
|
|
120
|
+
|
|
121
|
+
s2 = s1.mf.flatjson("$.a", dtype="int32", name="a")
|
|
122
|
+
assert s2.shape == (1,)
|
|
123
|
+
assert s2.index_value.key == s1.index_value.key
|
|
124
|
+
assert isinstance(s2, SERIES_TYPE)
|
|
125
|
+
assert s2.dtype == np.dtype("int32")
|
|
126
|
+
assert s2.name == "a"
|
|
127
|
+
|
|
128
|
+
with pytest.raises(ValueError):
|
|
129
|
+
s1.mf.flatjson([], dtypes=pd.Series(["int32", "bool"], index=["a", "b"]))
|
|
130
|
+
with pytest.raises(ValueError):
|
|
131
|
+
s1.mf.flatjson(["$.a"], dtypes=pd.Series(["int32", "bool"], index=["a", "b"]))
|
|
132
|
+
with pytest.raises(ValueError):
|
|
133
|
+
s1.mf.flatjson(["$.a"], dtypes=pd.Series(["int32", "bool"], index=["a", "b"]))
|
|
134
|
+
with pytest.raises(ValueError):
|
|
135
|
+
s1.mf.flatjson(["$.a", "$.b"], dtypes=pd.Series(["bool"], index=["b"]))
|
|
136
|
+
with pytest.raises(ValueError):
|
|
137
|
+
s1.mf.flatjson(
|
|
138
|
+
["$.a"],
|
|
139
|
+
dtype="int32",
|
|
140
|
+
dtypes=pd.Series(["int32"], index=["a"]),
|
|
141
|
+
)
|
|
142
|
+
with pytest.raises(ValueError):
|
|
143
|
+
s1.mf.flatjson(["$.a"])
|
|
@@ -28,7 +28,7 @@ from ..utils import build_df, build_series, parse_index
|
|
|
28
28
|
|
|
29
29
|
cudf = lazy_import("cudf")
|
|
30
30
|
|
|
31
|
-
_GROUP_KEYS_NO_DEFAULT = pd_release_version
|
|
31
|
+
_GROUP_KEYS_NO_DEFAULT = pd_release_version[:2] == (1, 5)
|
|
32
32
|
_default_group_keys = no_default if _GROUP_KEYS_NO_DEFAULT else True
|
|
33
33
|
|
|
34
34
|
|
|
@@ -59,7 +59,6 @@ class GroupByCumReductionOperator(DataFrameOperatorMixin, DataFrameOperator):
|
|
|
59
59
|
out_dtypes = self._calc_out_dtypes(groupby)
|
|
60
60
|
|
|
61
61
|
kw = in_df.params.copy()
|
|
62
|
-
kw["index_value"] = parse_index(pd.RangeIndex(-1), groupby.key)
|
|
63
62
|
if self.output_types[0] == OutputType.dataframe:
|
|
64
63
|
kw.update(
|
|
65
64
|
dict(
|
|
@@ -35,12 +35,15 @@ class GroupByFillOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
35
35
|
func_name = getattr(self, "_func_name")
|
|
36
36
|
|
|
37
37
|
if func_name == "fillna":
|
|
38
|
+
kw = {}
|
|
39
|
+
if self.axis is not None:
|
|
40
|
+
kw["axis"] = self.axis
|
|
38
41
|
result_df = mock_groupby.fillna(
|
|
39
42
|
value=self.value,
|
|
40
43
|
method=self.method,
|
|
41
|
-
axis=self.axis,
|
|
42
44
|
limit=self.limit,
|
|
43
45
|
downcast=self.downcast,
|
|
46
|
+
**kw,
|
|
44
47
|
)
|
|
45
48
|
else:
|
|
46
49
|
result_df = getattr(mock_groupby, func_name)(limit=self.limit)
|
|
@@ -88,5 +88,11 @@ def df_groupby_getitem(df_groupby, item):
|
|
|
88
88
|
if df_groupby.selection:
|
|
89
89
|
raise IndexError(f"Column(s) {df_groupby.selection!r} already selected")
|
|
90
90
|
|
|
91
|
+
if (
|
|
92
|
+
isinstance(item, tuple)
|
|
93
|
+
and item not in df_groupby.dtypes
|
|
94
|
+
and item not in df_groupby.index.names
|
|
95
|
+
):
|
|
96
|
+
item = list(item)
|
|
91
97
|
op = GroupByIndex(selection=item, output_types=output_types)
|
|
92
98
|
return op(df_groupby)
|
|
@@ -230,7 +230,7 @@ def test_groupby_transform():
|
|
|
230
230
|
assert r.op._op_type_ == opcodes.TRANSFORM
|
|
231
231
|
assert r.op.output_types[0] == OutputType.dataframe
|
|
232
232
|
|
|
233
|
-
r = mdf.groupby("b").transform(["cummax", "cumcount"], _call_agg=True)
|
|
233
|
+
r = mdf[list("abde")].groupby("b").transform(["cummax", "cumcount"], _call_agg=True)
|
|
234
234
|
assert r.shape == (np.nan, 6)
|
|
235
235
|
assert r.op._op_type_ == opcodes.TRANSFORM
|
|
236
236
|
assert r.op.output_types[0] == OutputType.dataframe
|
|
@@ -282,14 +282,17 @@ def test_groupby_cum():
|
|
|
282
282
|
r = getattr(mdf.groupby("b"), fun)()
|
|
283
283
|
assert r.op.output_types[0] == OutputType.dataframe
|
|
284
284
|
assert r.shape == (len(df1), 2)
|
|
285
|
+
assert r.index_value.key == mdf.index_value.key
|
|
285
286
|
|
|
286
287
|
r = getattr(mdf.groupby("b"), fun)(axis=1)
|
|
287
288
|
assert r.op.output_types[0] == OutputType.dataframe
|
|
288
289
|
assert r.shape == (len(df1), 3)
|
|
290
|
+
assert r.index_value.key == mdf.index_value.key
|
|
289
291
|
|
|
290
292
|
r = mdf.groupby("b").cumcount()
|
|
291
293
|
assert r.op.output_types[0] == OutputType.series
|
|
292
294
|
assert r.shape == (len(df1),)
|
|
295
|
+
assert r.index_value.key == mdf.index_value.key
|
|
293
296
|
|
|
294
297
|
series1 = pd.Series([2, 2, 5, 7, 3, 7, 8, 8, 5, 6])
|
|
295
298
|
ms1 = md.Series(series1, chunk_size=3)
|
|
@@ -298,6 +301,7 @@ def test_groupby_cum():
|
|
|
298
301
|
r = getattr(ms1.groupby(lambda x: x % 2), fun)()
|
|
299
302
|
assert r.op.output_types[0] == OutputType.series
|
|
300
303
|
assert r.shape == (len(series1),)
|
|
304
|
+
assert r.index_value.key == ms1.index_value.key
|
|
301
305
|
|
|
302
306
|
|
|
303
307
|
def test_groupby_fill():
|
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import logging
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
|
|
@@ -22,6 +24,8 @@ from ...utils import quiet_stdio
|
|
|
22
24
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
23
25
|
from ..utils import parse_index
|
|
24
26
|
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
25
29
|
|
|
26
30
|
class GroupByTransform(DataFrameOperator, DataFrameOperatorMixin):
|
|
27
31
|
_op_type_ = opcodes.TRANSFORM
|
|
@@ -65,7 +69,7 @@ class GroupByTransform(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
65
69
|
output_types = [OutputType.series]
|
|
66
70
|
new_dtypes = new_dtypes or (infer_df.name, infer_df.dtype)
|
|
67
71
|
except: # noqa: E722 # nosec
|
|
68
|
-
|
|
72
|
+
logger.info("Exception raised while inferring df_func", exc_info=True)
|
|
69
73
|
|
|
70
74
|
self.output_types = output_types if not self.output_types else self.output_types
|
|
71
75
|
dtypes = new_dtypes if dtypes is None else dtypes
|