maxframe 0.1.0b5__cp39-cp39-win32.whl → 1.0.0__cp39-cp39-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp39-win32.pyd +0 -0
- maxframe/codegen.py +10 -4
- maxframe/config/config.py +68 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp39-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +31 -7
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +117 -23
- maxframe/dataframe/datasource/read_odps_table.py +6 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +33 -2
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +26 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
- maxframe/learn/contrib/xgboost/predict.py +29 -46
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +29 -18
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp39-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +8 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp39-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/{base → misc}/unique.py +3 -3
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +106 -86
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +81 -74
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +194 -40
- maxframe_client/session/task.py +94 -39
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +109 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -322
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ from ..utils import validate_axis
|
|
|
18
18
|
def pct_change(
|
|
19
19
|
df_or_series, periods=1, fill_method="pad", limit=None, freq=None, **kwargs
|
|
20
20
|
):
|
|
21
|
+
# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/32
|
|
21
22
|
"""
|
|
22
23
|
Percentage change between the current and a prior element.
|
|
23
24
|
|
|
@@ -50,89 +51,6 @@ def pct_change(
|
|
|
50
51
|
DataFrame.diff : Compute the difference of two elements in a DataFrame.
|
|
51
52
|
Series.shift : Shift the index by some number of periods.
|
|
52
53
|
DataFrame.shift : Shift the index by some number of periods.
|
|
53
|
-
|
|
54
|
-
Examples
|
|
55
|
-
--------
|
|
56
|
-
**Series**
|
|
57
|
-
|
|
58
|
-
>>> import maxframe.dataframe as md
|
|
59
|
-
|
|
60
|
-
>>> s = md.Series([90, 91, 85])
|
|
61
|
-
>>> s.execute()
|
|
62
|
-
0 90
|
|
63
|
-
1 91
|
|
64
|
-
2 85
|
|
65
|
-
dtype: int64
|
|
66
|
-
|
|
67
|
-
>>> s.pct_change().execute()
|
|
68
|
-
0 NaN
|
|
69
|
-
1 0.011111
|
|
70
|
-
2 -0.065934
|
|
71
|
-
dtype: float64
|
|
72
|
-
|
|
73
|
-
>>> s.pct_change(periods=2).execute()
|
|
74
|
-
0 NaN
|
|
75
|
-
1 NaN
|
|
76
|
-
2 -0.055556
|
|
77
|
-
dtype: float64
|
|
78
|
-
|
|
79
|
-
See the percentage change in a Series where filling NAs with last
|
|
80
|
-
valid observation forward to next valid.
|
|
81
|
-
|
|
82
|
-
>>> s = md.Series([90, 91, None, 85])
|
|
83
|
-
>>> s.execute()
|
|
84
|
-
0 90.0
|
|
85
|
-
1 91.0
|
|
86
|
-
2 NaN
|
|
87
|
-
3 85.0
|
|
88
|
-
dtype: float64
|
|
89
|
-
|
|
90
|
-
>>> s.pct_change(fill_method='ffill').execute()
|
|
91
|
-
0 NaN
|
|
92
|
-
1 0.011111
|
|
93
|
-
2 0.000000
|
|
94
|
-
3 -0.065934
|
|
95
|
-
dtype: float64
|
|
96
|
-
|
|
97
|
-
**DataFrame**
|
|
98
|
-
|
|
99
|
-
Percentage change in French franc, Deutsche Mark, and Italian lira from
|
|
100
|
-
1980-01-01 to 1980-03-01.
|
|
101
|
-
|
|
102
|
-
>>> df = md.DataFrame({
|
|
103
|
-
... 'FR': [4.0405, 4.0963, 4.3149],
|
|
104
|
-
... 'GR': [1.7246, 1.7482, 1.8519],
|
|
105
|
-
... 'IT': [804.74, 810.01, 860.13]},
|
|
106
|
-
... index=['1980-01-01', '1980-02-01', '1980-03-01'])
|
|
107
|
-
>>> df.execute()
|
|
108
|
-
FR GR IT
|
|
109
|
-
1980-01-01 4.0405 1.7246 804.74
|
|
110
|
-
1980-02-01 4.0963 1.7482 810.01
|
|
111
|
-
1980-03-01 4.3149 1.8519 860.13
|
|
112
|
-
|
|
113
|
-
>>> df.pct_change().execute()
|
|
114
|
-
FR GR IT
|
|
115
|
-
1980-01-01 NaN NaN NaN
|
|
116
|
-
1980-02-01 0.013810 0.013684 0.006549
|
|
117
|
-
1980-03-01 0.053365 0.059318 0.061876
|
|
118
|
-
|
|
119
|
-
Percentage of change in GOOG and APPL stock volume. Shows computing
|
|
120
|
-
the percentage change between columns.
|
|
121
|
-
|
|
122
|
-
>>> df = md.DataFrame({
|
|
123
|
-
... '2016': [1769950, 30586265],
|
|
124
|
-
... '2015': [1500923, 40912316],
|
|
125
|
-
... '2014': [1371819, 41403351]},
|
|
126
|
-
... index=['GOOG', 'APPL'])
|
|
127
|
-
>>> df.execute()
|
|
128
|
-
2016 2015 2014
|
|
129
|
-
GOOG 1769950 1500923 1371819
|
|
130
|
-
APPL 30586265 40912316 41403351
|
|
131
|
-
|
|
132
|
-
>>> df.pct_change(axis='columns').execute()
|
|
133
|
-
2016 2015 2014
|
|
134
|
-
GOOG NaN -0.151997 -0.086016
|
|
135
|
-
APPL NaN 0.337604 0.012002
|
|
136
54
|
"""
|
|
137
55
|
|
|
138
56
|
axis = validate_axis(kwargs.pop("axis", 0))
|
|
@@ -18,6 +18,7 @@ import pytest
|
|
|
18
18
|
|
|
19
19
|
from .... import opcodes
|
|
20
20
|
from ....core import OutputType
|
|
21
|
+
from ....dataframe import DataFrame
|
|
21
22
|
from ....tensor.core import TENSOR_TYPE
|
|
22
23
|
from ... import eval as maxframe_eval
|
|
23
24
|
from ... import get_dummies, to_numeric
|
|
@@ -348,7 +349,9 @@ def test_drop():
|
|
|
348
349
|
def test_drop_duplicates():
|
|
349
350
|
rs = np.random.RandomState(0)
|
|
350
351
|
raw = pd.DataFrame(
|
|
351
|
-
rs.randint(1000, size=(20, 7)),
|
|
352
|
+
rs.randint(1000, size=(20, 7)),
|
|
353
|
+
columns=["c" + str(i + 1) for i in range(7)],
|
|
354
|
+
index=pd.Index(range(20), name="idx"),
|
|
352
355
|
)
|
|
353
356
|
raw["c7"] = [f"s{j}" for j in range(20)]
|
|
354
357
|
|
|
@@ -360,6 +363,12 @@ def test_drop_duplicates():
|
|
|
360
363
|
with pytest.raises(KeyError):
|
|
361
364
|
df.drop_duplicates(subset="c8")
|
|
362
365
|
|
|
366
|
+
# check index
|
|
367
|
+
distinct_df = df.drop_duplicates()
|
|
368
|
+
assert distinct_df.index_value.name == df.index_value.name
|
|
369
|
+
assert isinstance(df.index_value.to_pandas(), pd.RangeIndex)
|
|
370
|
+
assert not isinstance(distinct_df.index_value.to_pandas(), pd.RangeIndex)
|
|
371
|
+
|
|
363
372
|
s = df["c7"]
|
|
364
373
|
with pytest.raises(ValueError):
|
|
365
374
|
s.drop_duplicates(method="unknown")
|
|
@@ -430,6 +439,28 @@ def test_case_when():
|
|
|
430
439
|
assert isinstance(col.inputs[2].op, DataFrameGreater)
|
|
431
440
|
|
|
432
441
|
|
|
442
|
+
def test_apply():
|
|
443
|
+
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
|
|
444
|
+
|
|
445
|
+
keys = [1, 2]
|
|
446
|
+
|
|
447
|
+
def f(x, keys):
|
|
448
|
+
if x["a"] in keys:
|
|
449
|
+
return [1, 0]
|
|
450
|
+
else:
|
|
451
|
+
return [0, 1]
|
|
452
|
+
|
|
453
|
+
apply_df = df[["a"]].apply(
|
|
454
|
+
f,
|
|
455
|
+
output_type="dataframe",
|
|
456
|
+
dtypes=pd.Series(["int64", "int64"]),
|
|
457
|
+
axis=1,
|
|
458
|
+
result_type="expand",
|
|
459
|
+
keys=keys,
|
|
460
|
+
)
|
|
461
|
+
assert apply_df.shape == (3, 2)
|
|
462
|
+
|
|
463
|
+
|
|
433
464
|
def test_pivot_table():
|
|
434
465
|
from ...groupby.aggregation import DataFrameGroupByAgg
|
|
435
466
|
from ...misc.pivot_table import DataFramePivotTable
|
|
@@ -451,7 +482,7 @@ def test_pivot_table():
|
|
|
451
482
|
with pytest.raises(ValueError):
|
|
452
483
|
df.pivot_table(values=["D", "E"], aggfunc="sum")
|
|
453
484
|
|
|
454
|
-
t = df.pivot_table(index="A")
|
|
485
|
+
t = df.pivot_table(index=["A", "B", "C"])
|
|
455
486
|
assert isinstance(t.op, DataFrameGroupByAgg)
|
|
456
487
|
t = df.pivot_table(index="A", values=["D", "E"], aggfunc="sum")
|
|
457
488
|
assert isinstance(t.op, DataFrameGroupByAgg)
|
|
@@ -228,21 +228,6 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
|
|
|
228
228
|
0 1 2
|
|
229
229
|
1 2 3
|
|
230
230
|
2 3 4
|
|
231
|
-
|
|
232
|
-
Even though the resulting DataFrame must have the same length as the
|
|
233
|
-
input DataFrame, it is possible to provide several input functions:
|
|
234
|
-
|
|
235
|
-
>>> s = md.Series(range(3))
|
|
236
|
-
>>> s.execute()
|
|
237
|
-
0 0
|
|
238
|
-
1 1
|
|
239
|
-
2 2
|
|
240
|
-
dtype: int64
|
|
241
|
-
>>> s.transform([mt.sqrt, mt.exp]).execute()
|
|
242
|
-
sqrt exp
|
|
243
|
-
0 0.000000 1.000000
|
|
244
|
-
1 1.000000 2.718282
|
|
245
|
-
2 1.414214 7.389056
|
|
246
231
|
"""
|
|
247
232
|
op = TransformOperator(
|
|
248
233
|
func=func,
|
|
@@ -265,6 +250,7 @@ def series_transform(
|
|
|
265
250
|
dtype=None,
|
|
266
251
|
**kwargs
|
|
267
252
|
):
|
|
253
|
+
# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/10
|
|
268
254
|
"""
|
|
269
255
|
Call ``func`` on self producing a Series with transformed values.
|
|
270
256
|
|
|
@@ -332,21 +318,6 @@ def series_transform(
|
|
|
332
318
|
0 1 2
|
|
333
319
|
1 2 3
|
|
334
320
|
2 3 4
|
|
335
|
-
|
|
336
|
-
Even though the resulting Series must have the same length as the
|
|
337
|
-
input Series, it is possible to provide several input functions:
|
|
338
|
-
|
|
339
|
-
>>> s = md.Series(range(3))
|
|
340
|
-
>>> s.execute()
|
|
341
|
-
0 0
|
|
342
|
-
1 1
|
|
343
|
-
2 2
|
|
344
|
-
dtype: int64
|
|
345
|
-
>>> s.transform([mt.sqrt, mt.exp]).execute()
|
|
346
|
-
sqrt exp
|
|
347
|
-
0 0.000000 1.000000
|
|
348
|
-
1 1.000000 2.718282
|
|
349
|
-
2 1.414214 7.389056
|
|
350
321
|
"""
|
|
351
322
|
op = TransformOperator(
|
|
352
323
|
func=func,
|
|
@@ -85,6 +85,7 @@ def value_counts(
|
|
|
85
85
|
dropna=True,
|
|
86
86
|
method="auto",
|
|
87
87
|
):
|
|
88
|
+
# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/33
|
|
88
89
|
"""
|
|
89
90
|
Return a Series containing counts of unique values.
|
|
90
91
|
|
|
@@ -125,9 +126,8 @@ def value_counts(
|
|
|
125
126
|
Examples
|
|
126
127
|
--------
|
|
127
128
|
>>> import maxframe.dataframe as md
|
|
128
|
-
>>> import
|
|
129
|
-
|
|
130
|
-
>>> s = md.Series([3, 1, 2, 3, 4, mt.nan])
|
|
129
|
+
>>> import numpy as np
|
|
130
|
+
>>> s = md.Series([3, 1, 2, 3, 4, np.nan])
|
|
131
131
|
>>> s.value_counts().execute()
|
|
132
132
|
3.0 2
|
|
133
133
|
4.0 1
|
|
@@ -138,7 +138,7 @@ def value_counts(
|
|
|
138
138
|
With `normalize` set to `True`, returns the relative frequency by
|
|
139
139
|
dividing all values by the sum of values.
|
|
140
140
|
|
|
141
|
-
>>> s = md.Series([3, 1, 2, 3, 4,
|
|
141
|
+
>>> s = md.Series([3, 1, 2, 3, 4, np.nan])
|
|
142
142
|
>>> s.value_counts(normalize=True).execute()
|
|
143
143
|
3.0 0.4
|
|
144
144
|
4.0 0.2
|
|
@@ -146,19 +146,6 @@ def value_counts(
|
|
|
146
146
|
1.0 0.2
|
|
147
147
|
dtype: float64
|
|
148
148
|
|
|
149
|
-
**bins**
|
|
150
|
-
|
|
151
|
-
Bins can be useful for going from a continuous variable to a
|
|
152
|
-
categorical variable; instead of counting unique
|
|
153
|
-
apparitions of values, divide the index in the specified
|
|
154
|
-
number of half-open bins.
|
|
155
|
-
|
|
156
|
-
>>> s.value_counts(bins=3).execute()
|
|
157
|
-
(2.0, 3.0] 2
|
|
158
|
-
(0.996, 2.0] 2
|
|
159
|
-
(3.0, 4.0] 1
|
|
160
|
-
dtype: int64
|
|
161
|
-
|
|
162
149
|
**dropna**
|
|
163
150
|
|
|
164
151
|
With `dropna` set to `False` we can also see NaN index values.
|
|
@@ -234,7 +234,7 @@ def series_dropna(series, axis=0, inplace=False, how=None):
|
|
|
234
234
|
Empty strings are not considered NA values. ``None`` is considered an
|
|
235
235
|
NA value.
|
|
236
236
|
|
|
237
|
-
>>> ser = md.Series([np.NaN, 2, md.NaT, '', None, 'I stay'])
|
|
237
|
+
>>> ser = md.Series([np.NaN, '2', md.NaT, '', None, 'I stay'])
|
|
238
238
|
>>> ser.execute()
|
|
239
239
|
0 NaN
|
|
240
240
|
1 2
|
|
@@ -132,11 +132,11 @@ def fillna(
|
|
|
132
132
|
--------
|
|
133
133
|
>>> import maxframe.tensor as mt
|
|
134
134
|
>>> import maxframe.dataframe as md
|
|
135
|
-
>>> df = md.DataFrame([[
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
135
|
+
>>> df = md.DataFrame([[np.nan, 2, np.nan, 0],
|
|
136
|
+
[3, 4, np.nan, 1],
|
|
137
|
+
[np.nan, np.nan, np.nan, 5],
|
|
138
|
+
[np.nan, 3, np.nan, 4]],
|
|
139
|
+
columns=list('ABCD'))
|
|
140
140
|
>>> df.execute()
|
|
141
141
|
A B C D
|
|
142
142
|
0 NaN 2.0 NaN 0
|
maxframe/dataframe/operators.py
CHANGED
|
@@ -16,13 +16,7 @@ import numpy as np
|
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
18
|
from ..core import ENTITY_TYPE, OutputType
|
|
19
|
-
from ..core.operator import
|
|
20
|
-
Fuse,
|
|
21
|
-
FuseChunkMixin,
|
|
22
|
-
Operator,
|
|
23
|
-
ShuffleProxy,
|
|
24
|
-
TileableOperatorMixin,
|
|
25
|
-
)
|
|
19
|
+
from ..core.operator import Operator, ShuffleProxy, TileableOperatorMixin
|
|
26
20
|
from ..tensor.core import TENSOR_TYPE
|
|
27
21
|
from ..tensor.datasource import tensor as astensor
|
|
28
22
|
from .core import DATAFRAME_TYPE, SERIES_TYPE
|
|
@@ -261,13 +255,3 @@ DataFrameOperator = Operator
|
|
|
261
255
|
class DataFrameShuffleProxy(ShuffleProxy, DataFrameOperatorMixin):
|
|
262
256
|
def __init__(self, sparse=None, output_types=None, **kwargs):
|
|
263
257
|
super().__init__(sparse=sparse, _output_types=output_types, **kwargs)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
class DataFrameFuseChunkMixin(FuseChunkMixin, DataFrameOperatorMixin):
|
|
267
|
-
__slots__ = ()
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
class DataFrameFuseChunk(Fuse, DataFrameFuseChunkMixin):
|
|
271
|
-
@property
|
|
272
|
-
def output_types(self):
|
|
273
|
-
return self.outputs[-1].chunk.op.output_types
|
|
@@ -552,7 +552,7 @@ class ReductionCompiler:
|
|
|
552
552
|
@enter_mode(build=True)
|
|
553
553
|
def _compile_function(self, func, func_name=None, ndim=1) -> ReductionSteps:
|
|
554
554
|
from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
|
|
555
|
-
from ...tensor.
|
|
555
|
+
from ...tensor.misc import TensorWhere
|
|
556
556
|
from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
|
|
557
557
|
from ..datasource.dataframe import DataFrameDataSource
|
|
558
558
|
from ..datasource.series import SeriesDataSource
|
|
@@ -679,8 +679,8 @@ class ReductionCompiler:
|
|
|
679
679
|
]
|
|
680
680
|
"""
|
|
681
681
|
from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
|
|
682
|
-
from ...tensor.base import TensorWhere
|
|
683
682
|
from ...tensor.datasource import Scalar
|
|
683
|
+
from ...tensor.misc import TensorWhere
|
|
684
684
|
from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
|
|
685
685
|
from ..datasource.dataframe import DataFrameDataSource
|
|
686
686
|
from ..datasource.series import SeriesDataSource
|
|
@@ -23,6 +23,7 @@ import pytest
|
|
|
23
23
|
|
|
24
24
|
from .... import dataframe as md
|
|
25
25
|
from ....tensor import Tensor
|
|
26
|
+
from ....tests.utils import assert_mf_index_dtype
|
|
26
27
|
from ...core import DataFrame, IndexValue, OutputType, Series
|
|
27
28
|
from ...datasource.dataframe import from_pandas as from_pandas_df
|
|
28
29
|
from ...datasource.series import from_pandas as from_pandas_series
|
|
@@ -111,10 +112,7 @@ def test_dataframe_reduction(func_name, op, func_opts: FunctionOptions):
|
|
|
111
112
|
reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
|
|
112
113
|
|
|
113
114
|
assert isinstance(reduction_df, Series)
|
|
114
|
-
|
|
115
|
-
reduction_df.index_value._index_value,
|
|
116
|
-
(IndexValue.RangeIndex, IndexValue.Int64Index),
|
|
117
|
-
)
|
|
115
|
+
assert_mf_index_dtype(reduction_df.index_value._index_value, np.int64)
|
|
118
116
|
assert reduction_df.shape == (10,)
|
|
119
117
|
|
|
120
118
|
data = pd.DataFrame(np.random.rand(20, 20), index=[str(i) for i in range(20)])
|
|
@@ -67,6 +67,7 @@ def dataframe_sort_values(
|
|
|
67
67
|
parallel_kind="PSRS",
|
|
68
68
|
psrs_kinds=None,
|
|
69
69
|
):
|
|
70
|
+
# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/15
|
|
70
71
|
"""
|
|
71
72
|
Sort by the values along either axis.
|
|
72
73
|
|
|
@@ -152,17 +153,6 @@ def dataframe_sort_values(
|
|
|
152
153
|
0 A 2 0
|
|
153
154
|
1 A 1 1
|
|
154
155
|
3 NaN 8 4
|
|
155
|
-
|
|
156
|
-
Putting NAs first
|
|
157
|
-
|
|
158
|
-
>>> df.sort_values(by='col1', ascending=False, na_position='first').execute()
|
|
159
|
-
col1 col2 col3
|
|
160
|
-
3 NaN 8 4
|
|
161
|
-
4 D 7 2
|
|
162
|
-
5 C 4 3
|
|
163
|
-
2 B 9 9
|
|
164
|
-
0 A 2 0
|
|
165
|
-
1 A 1 1
|
|
166
156
|
"""
|
|
167
157
|
|
|
168
158
|
if na_position not in ["last", "first"]: # pragma: no cover
|
|
@@ -43,7 +43,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
43
43
|
def __call__(self, df_or_series):
|
|
44
44
|
if isinstance(df_or_series, SERIES_TYPE):
|
|
45
45
|
inputs = filter_inputs([df_or_series, self.other])
|
|
46
|
-
return self.new_scalar(inputs, dtype=np.dtype(
|
|
46
|
+
return self.new_scalar(inputs, dtype=np.dtype(float))
|
|
47
47
|
else:
|
|
48
48
|
|
|
49
49
|
def _filter_numeric(obj):
|
|
@@ -60,7 +60,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
60
60
|
inputs = filter_inputs([df_or_series, self.other])
|
|
61
61
|
if self.axis is None:
|
|
62
62
|
dtypes = pd.Series(
|
|
63
|
-
[np.dtype(
|
|
63
|
+
[np.dtype(float)] * len(df_or_series.dtypes),
|
|
64
64
|
index=df_or_series.dtypes.index,
|
|
65
65
|
)
|
|
66
66
|
return self.new_dataframe(
|
|
@@ -85,7 +85,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
85
85
|
return self.new_series(
|
|
86
86
|
inputs,
|
|
87
87
|
shape=shape,
|
|
88
|
-
dtype=np.dtype(
|
|
88
|
+
dtype=np.dtype(float),
|
|
89
89
|
index_value=new_index_value,
|
|
90
90
|
)
|
|
91
91
|
|
|
@@ -14,8 +14,9 @@
|
|
|
14
14
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
import pandas as pd
|
|
17
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
17
18
|
|
|
18
|
-
from ... import opcodes
|
|
19
|
+
from ... import opcodes
|
|
19
20
|
from ...core import ENTITY_TYPE
|
|
20
21
|
from ...serialization.serializables import (
|
|
21
22
|
AnyField,
|
|
@@ -32,11 +33,11 @@ from ...tensor.datasource import tensor as astensor
|
|
|
32
33
|
from ...tensor.statistics.quantile import quantile as tensor_quantile
|
|
33
34
|
from ..core import DATAFRAME_TYPE
|
|
34
35
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
35
|
-
from ..utils import build_empty_df,
|
|
36
|
+
from ..utils import build_empty_df, parse_index, validate_axis
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
|
|
39
|
-
_op_type_ =
|
|
40
|
+
_op_type_ = opcodes.QUANTILE
|
|
40
41
|
|
|
41
42
|
input = KeyField("input", default=None)
|
|
42
43
|
q = AnyField("q", default=None)
|
|
@@ -80,7 +81,10 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
80
81
|
store_index_value = False
|
|
81
82
|
else:
|
|
82
83
|
q_val = np.asanyarray(self.q)
|
|
83
|
-
|
|
84
|
+
if q_val.ndim == 0:
|
|
85
|
+
pd_index = pd.Index(q_val.reshape(1))
|
|
86
|
+
else:
|
|
87
|
+
pd_index = pd.Index(q_val)
|
|
84
88
|
name = self.q if q_val.size == 1 else None
|
|
85
89
|
store_index_value = True
|
|
86
90
|
tokenize_objects = (a, q_val, self.interpolation, type(self).__name__)
|
|
@@ -163,7 +167,10 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
163
167
|
store_index_value = False
|
|
164
168
|
else:
|
|
165
169
|
q_val = np.asanyarray(self.q)
|
|
166
|
-
|
|
170
|
+
if q_val.ndim == 0:
|
|
171
|
+
index_val = pd.Index(q_val.reshape(1))
|
|
172
|
+
else:
|
|
173
|
+
index_val = pd.Index(q_val)
|
|
167
174
|
store_index_value = True
|
|
168
175
|
|
|
169
176
|
# get dtype by tensor
|
|
@@ -259,6 +266,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
|
|
|
259
266
|
|
|
260
267
|
|
|
261
268
|
def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
|
|
269
|
+
# FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
|
|
262
270
|
"""
|
|
263
271
|
Return values at the given quantile over requested axis.
|
|
264
272
|
|
|
@@ -309,20 +317,6 @@ def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="line
|
|
|
309
317
|
a b
|
|
310
318
|
0.1 1.3 3.7
|
|
311
319
|
0.5 2.5 55.0
|
|
312
|
-
|
|
313
|
-
Specifying `numeric_only=False` will also compute the quantile of
|
|
314
|
-
datetime and timedelta data.
|
|
315
|
-
|
|
316
|
-
>>> df = md.DataFrame({'A': [1, 2],
|
|
317
|
-
... 'B': [md.Timestamp('2010'),
|
|
318
|
-
... md.Timestamp('2011')],
|
|
319
|
-
... 'C': [md.Timedelta('1 days'),
|
|
320
|
-
... md.Timedelta('2 days')]})
|
|
321
|
-
>>> df.quantile(0.5, numeric_only=False).execute()
|
|
322
|
-
A 1.5
|
|
323
|
-
B 2010-07-02 12:00:00
|
|
324
|
-
C 1 days 12:00:00
|
|
325
|
-
Name: 0.5, dtype: object
|
|
326
320
|
"""
|
|
327
321
|
if isinstance(q, ENTITY_TYPE):
|
|
328
322
|
q = astensor(q)
|
|
@@ -49,7 +49,7 @@ def test_dataframe_quantile():
|
|
|
49
49
|
|
|
50
50
|
# q = 0.3, axis = 0
|
|
51
51
|
r = s.quantile(0.3)
|
|
52
|
-
e = raw.quantile(0.3)
|
|
52
|
+
e = raw.quantile(0.3, numeric_only=True)
|
|
53
53
|
assert isinstance(r, Series)
|
|
54
54
|
assert r.shape == (2,)
|
|
55
55
|
assert r.dtype == e.dtype
|
|
@@ -57,7 +57,7 @@ def test_dataframe_quantile():
|
|
|
57
57
|
|
|
58
58
|
# q = 0.3, axis = 1
|
|
59
59
|
r = s.quantile(0.3, axis=1)
|
|
60
|
-
e = raw.quantile(0.3, axis=1)
|
|
60
|
+
e = raw.quantile(0.3, numeric_only=True, axis=1)
|
|
61
61
|
assert isinstance(r, Series)
|
|
62
62
|
assert r.shape == e.shape
|
|
63
63
|
assert r.dtype == e.dtype
|
|
@@ -65,7 +65,7 @@ def test_dataframe_quantile():
|
|
|
65
65
|
|
|
66
66
|
# q = [0.3, 0.7], axis = 0
|
|
67
67
|
r = s.quantile([0.3, 0.7])
|
|
68
|
-
e = raw.quantile([0.3, 0.7])
|
|
68
|
+
e = raw.quantile([0.3, 0.7], numeric_only=True)
|
|
69
69
|
assert isinstance(r, DataFrame)
|
|
70
70
|
assert r.shape == e.shape
|
|
71
71
|
pd.testing.assert_series_equal(r.dtypes, e.dtypes)
|
|
@@ -74,7 +74,7 @@ def test_dataframe_quantile():
|
|
|
74
74
|
|
|
75
75
|
# q = [0.3, 0.7], axis = 1
|
|
76
76
|
r = s.quantile([0.3, 0.7], axis=1)
|
|
77
|
-
e = raw.quantile([0.3, 0.7], axis=1)
|
|
77
|
+
e = raw.quantile([0.3, 0.7], numeric_only=True, axis=1)
|
|
78
78
|
assert isinstance(r, DataFrame)
|
|
79
79
|
assert r.shape == e.shape
|
|
80
80
|
pd.testing.assert_series_equal(r.dtypes, e.dtypes)
|
|
@@ -13,12 +13,13 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import pandas as pd
|
|
16
|
+
import pytest
|
|
16
17
|
|
|
17
18
|
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
18
|
-
from ..initializer import read_pandas
|
|
19
|
+
from ..initializer import DataFrame, Series, read_pandas
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
def
|
|
22
|
+
def test_read_pandas():
|
|
22
23
|
df_data = pd.DataFrame([["a", 1], ["b", 2]], columns=["a", "b"])
|
|
23
24
|
assert isinstance(read_pandas(df_data), DATAFRAME_TYPE)
|
|
24
25
|
|
|
@@ -27,3 +28,33 @@ def test_from_pandas():
|
|
|
27
28
|
|
|
28
29
|
idx_data = pd.Index(["a", "b"])
|
|
29
30
|
assert isinstance(read_pandas(idx_data), INDEX_TYPE)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_init_dataframe_from_maxframe_series():
|
|
34
|
+
s = Series([1, 2, 3, 4], index=[1, 2, 3, 4])
|
|
35
|
+
|
|
36
|
+
df = DataFrame(s, index=s.index, columns=["col1"])
|
|
37
|
+
|
|
38
|
+
assert isinstance(df, DATAFRAME_TYPE)
|
|
39
|
+
assert df.dtypes.index == ["col1"]
|
|
40
|
+
|
|
41
|
+
with pytest.raises(ValueError):
|
|
42
|
+
DataFrame(s, index=s.index, columns=[])
|
|
43
|
+
|
|
44
|
+
with pytest.raises(ValueError):
|
|
45
|
+
DataFrame(s, index=s.index, columns="col1")
|
|
46
|
+
|
|
47
|
+
with pytest.raises(ValueError):
|
|
48
|
+
DataFrame(s, index=s.index, columns="col2")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_init_dataframe_from_maxframe_dataframe():
|
|
52
|
+
df1 = DataFrame({"A": [1, 2, 3, 4], "B": [1, 2, 3, 4]}, index=[1, 2, 3, 4])
|
|
53
|
+
|
|
54
|
+
df2 = DataFrame(df1, index=df1.index, columns=["col1", "col2"])
|
|
55
|
+
|
|
56
|
+
assert isinstance(df2, DATAFRAME_TYPE)
|
|
57
|
+
assert list(df2.dtypes.index) == ["col1", "col2"]
|
|
58
|
+
|
|
59
|
+
with pytest.raises(ValueError):
|
|
60
|
+
DataFrame(df1, index=df1.index, columns=["col1", "col2", "col3"])
|
maxframe/dataframe/utils.py
CHANGED
|
@@ -26,7 +26,6 @@ import numpy as np
|
|
|
26
26
|
import pandas as pd
|
|
27
27
|
from pandas.api.extensions import ExtensionDtype
|
|
28
28
|
from pandas.api.types import is_string_dtype
|
|
29
|
-
from pandas.core.dtypes.cast import find_common_type
|
|
30
29
|
from pandas.core.dtypes.inference import is_dict_like, is_list_like
|
|
31
30
|
|
|
32
31
|
from ..core import Entity, ExecutableTuple
|
|
@@ -264,12 +263,30 @@ def parse_index(index_value, *args, store_data=False, key=None):
|
|
|
264
263
|
return IndexValue(_index_value=_serialize_index(index_value))
|
|
265
264
|
|
|
266
265
|
|
|
267
|
-
def gen_unknown_index_value(index_value, *args):
|
|
266
|
+
def gen_unknown_index_value(index_value, *args, normalize_range_index=False):
|
|
267
|
+
"""
|
|
268
|
+
Generate new index value with the same likes of given index_value and args, but without any value.
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
index_value
|
|
273
|
+
Given index value.
|
|
274
|
+
args
|
|
275
|
+
Arguments for parse_index.
|
|
276
|
+
normalize_range_index
|
|
277
|
+
If normalize range index to normal index.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
New created range index value.
|
|
282
|
+
"""
|
|
268
283
|
pd_index = index_value.to_pandas()
|
|
269
|
-
if isinstance(pd_index, pd.RangeIndex):
|
|
270
|
-
return parse_index(pd.RangeIndex(-1), *args)
|
|
284
|
+
if not normalize_range_index and isinstance(pd_index, pd.RangeIndex):
|
|
285
|
+
return parse_index(pd.RangeIndex(-1, name=pd_index.name), *args)
|
|
271
286
|
elif not isinstance(pd_index, pd.MultiIndex):
|
|
272
|
-
return parse_index(
|
|
287
|
+
return parse_index(
|
|
288
|
+
pd.Index([], dtype=pd_index.dtype, name=pd_index.name), *args
|
|
289
|
+
)
|
|
273
290
|
else:
|
|
274
291
|
i = pd.MultiIndex.from_arrays(
|
|
275
292
|
[c[:0] for c in pd_index.levels], names=pd_index.names
|
|
@@ -477,11 +494,11 @@ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
|
|
|
477
494
|
else:
|
|
478
495
|
fill_values = fill_value
|
|
479
496
|
|
|
480
|
-
from .core import SERIES_TYPE
|
|
497
|
+
from .core import INDEX_TYPE, SERIES_TYPE
|
|
481
498
|
|
|
482
499
|
dtypes = (
|
|
483
500
|
pd.Series([df_obj.dtype], index=[df_obj.name])
|
|
484
|
-
if isinstance(df_obj, SERIES_TYPE)
|
|
501
|
+
if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
|
|
485
502
|
else df_obj.dtypes
|
|
486
503
|
)
|
|
487
504
|
for size, fill_value in zip(sizes, fill_values):
|
|
@@ -593,7 +610,7 @@ def build_series(
|
|
|
593
610
|
return ret_series
|
|
594
611
|
|
|
595
612
|
|
|
596
|
-
def infer_index_value(left_index_value, right_index_value):
|
|
613
|
+
def infer_index_value(left_index_value, right_index_value, level=None):
|
|
597
614
|
from .core import IndexValue
|
|
598
615
|
|
|
599
616
|
if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
|
|
@@ -616,9 +633,7 @@ def infer_index_value(left_index_value, right_index_value):
|
|
|
616
633
|
|
|
617
634
|
left_index = left_index_value.to_pandas()
|
|
618
635
|
right_index = right_index_value.to_pandas()
|
|
619
|
-
out_index =
|
|
620
|
-
[], dtype=find_common_type([left_index.dtype, right_index.dtype])
|
|
621
|
-
)
|
|
636
|
+
out_index = left_index.join(right_index, level=level)[:0]
|
|
622
637
|
return parse_index(out_index, left_index_value, right_index_value)
|
|
623
638
|
|
|
624
639
|
|
|
@@ -28,6 +28,7 @@ from .aggregation import BaseDataFrameExpandingAgg
|
|
|
28
28
|
from .core import Window
|
|
29
29
|
|
|
30
30
|
_window_has_method = pd_release_version >= (1, 3, 0)
|
|
31
|
+
_window_has_center = pd_release_version < (2, 0, 0)
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class DataFrameExpandingAgg(BaseDataFrameExpandingAgg):
|
|
@@ -49,10 +50,11 @@ class Expanding(Window):
|
|
|
49
50
|
def params(self):
|
|
50
51
|
p = OrderedDict()
|
|
51
52
|
|
|
53
|
+
args = ["min_periods", "center", "axis", "method"]
|
|
52
54
|
if not _window_has_method: # pragma: no cover
|
|
53
|
-
args = [
|
|
54
|
-
|
|
55
|
-
args = [
|
|
55
|
+
args = [a for a in args if a != "method"]
|
|
56
|
+
if not _window_has_center:
|
|
57
|
+
args = [a for a in args if a != "center"]
|
|
56
58
|
|
|
57
59
|
for k in args:
|
|
58
60
|
p[k] = getattr(self, k)
|