polars-runtime-compat 1.34.0b2__cp39-abi3-win_arm64.whl → 1.34.0b4__cp39-abi3-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
- polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -96
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/lazyframe/group_by.py
DELETED
|
@@ -1,669 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING, Callable
|
|
4
|
-
|
|
5
|
-
from polars import functions as F
|
|
6
|
-
from polars._utils.deprecation import deprecated
|
|
7
|
-
from polars._utils.parse import parse_into_list_of_expressions
|
|
8
|
-
from polars._utils.wrap import wrap_df, wrap_ldf
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
import sys
|
|
12
|
-
from collections.abc import Iterable
|
|
13
|
-
|
|
14
|
-
from polars import DataFrame, LazyFrame
|
|
15
|
-
from polars._plr import PyLazyGroupBy
|
|
16
|
-
from polars._typing import IntoExpr, QuantileMethod, SchemaDict
|
|
17
|
-
|
|
18
|
-
if sys.version_info >= (3, 13):
|
|
19
|
-
from warnings import deprecated
|
|
20
|
-
else:
|
|
21
|
-
from typing_extensions import deprecated # noqa: TC004
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class LazyGroupBy:
|
|
25
|
-
"""
|
|
26
|
-
Utility class for performing a group by operation over a lazy DataFrame.
|
|
27
|
-
|
|
28
|
-
Generated by calling `df.lazy().group_by(...)`.
|
|
29
|
-
"""
|
|
30
|
-
|
|
31
|
-
def __init__(self, lgb: PyLazyGroupBy) -> None:
|
|
32
|
-
self.lgb = lgb
|
|
33
|
-
|
|
34
|
-
def agg(
|
|
35
|
-
self,
|
|
36
|
-
*aggs: IntoExpr | Iterable[IntoExpr],
|
|
37
|
-
**named_aggs: IntoExpr,
|
|
38
|
-
) -> LazyFrame:
|
|
39
|
-
"""
|
|
40
|
-
Compute aggregations for each group of a group by operation.
|
|
41
|
-
|
|
42
|
-
Parameters
|
|
43
|
-
----------
|
|
44
|
-
*aggs
|
|
45
|
-
Aggregations to compute for each group of the group by operation,
|
|
46
|
-
specified as positional arguments.
|
|
47
|
-
Accepts expression input. Strings are parsed as column names.
|
|
48
|
-
**named_aggs
|
|
49
|
-
Additional aggregations, specified as keyword arguments.
|
|
50
|
-
The resulting columns will be renamed to the keyword used.
|
|
51
|
-
|
|
52
|
-
Examples
|
|
53
|
-
--------
|
|
54
|
-
Compute the aggregation of the columns for each group.
|
|
55
|
-
|
|
56
|
-
>>> ldf = pl.DataFrame(
|
|
57
|
-
... {
|
|
58
|
-
... "a": ["a", "b", "a", "b", "c"],
|
|
59
|
-
... "b": [1, 2, 1, 3, 3],
|
|
60
|
-
... "c": [5, 4, 3, 2, 1],
|
|
61
|
-
... }
|
|
62
|
-
... ).lazy()
|
|
63
|
-
>>> ldf.group_by("a").agg(
|
|
64
|
-
... [pl.col("b"), pl.col("c")]
|
|
65
|
-
... ).collect() # doctest: +IGNORE_RESULT
|
|
66
|
-
shape: (3, 3)
|
|
67
|
-
┌─────┬───────────┬───────────┐
|
|
68
|
-
│ a ┆ b ┆ c │
|
|
69
|
-
│ --- ┆ --- ┆ --- │
|
|
70
|
-
│ str ┆ list[i64] ┆ list[i64] │
|
|
71
|
-
╞═════╪═══════════╪═══════════╡
|
|
72
|
-
│ a ┆ [1, 1] ┆ [5, 3] │
|
|
73
|
-
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
|
74
|
-
│ b ┆ [2, 3] ┆ [4, 2] │
|
|
75
|
-
├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
|
76
|
-
│ c ┆ [3] ┆ [1] │
|
|
77
|
-
└─────┴───────────┴───────────┘
|
|
78
|
-
|
|
79
|
-
Compute the sum of a column for each group.
|
|
80
|
-
|
|
81
|
-
>>> ldf.group_by("a").agg(
|
|
82
|
-
... pl.col("b").sum()
|
|
83
|
-
... ).collect() # doctest: +IGNORE_RESULT
|
|
84
|
-
shape: (3, 2)
|
|
85
|
-
┌─────┬─────┐
|
|
86
|
-
│ a ┆ b │
|
|
87
|
-
│ --- ┆ --- │
|
|
88
|
-
│ str ┆ i64 │
|
|
89
|
-
╞═════╪═════╡
|
|
90
|
-
│ a ┆ 2 │
|
|
91
|
-
│ b ┆ 5 │
|
|
92
|
-
│ c ┆ 3 │
|
|
93
|
-
└─────┴─────┘
|
|
94
|
-
|
|
95
|
-
Compute multiple aggregates at once by passing a list of expressions.
|
|
96
|
-
|
|
97
|
-
>>> ldf.group_by("a").agg(
|
|
98
|
-
... [pl.sum("b"), pl.mean("c")]
|
|
99
|
-
... ).collect() # doctest: +IGNORE_RESULT
|
|
100
|
-
shape: (3, 3)
|
|
101
|
-
┌─────┬─────┬─────┐
|
|
102
|
-
│ a ┆ b ┆ c │
|
|
103
|
-
│ --- ┆ --- ┆ --- │
|
|
104
|
-
│ str ┆ i64 ┆ f64 │
|
|
105
|
-
╞═════╪═════╪═════╡
|
|
106
|
-
│ c ┆ 3 ┆ 1.0 │
|
|
107
|
-
│ a ┆ 2 ┆ 4.0 │
|
|
108
|
-
│ b ┆ 5 ┆ 3.0 │
|
|
109
|
-
└─────┴─────┴─────┘
|
|
110
|
-
|
|
111
|
-
Or use positional arguments to compute multiple aggregations in the same way.
|
|
112
|
-
|
|
113
|
-
>>> ldf.group_by("a").agg(
|
|
114
|
-
... pl.sum("b").name.suffix("_sum"),
|
|
115
|
-
... (pl.col("c") ** 2).mean().name.suffix("_mean_squared"),
|
|
116
|
-
... ).collect() # doctest: +IGNORE_RESULT
|
|
117
|
-
shape: (3, 3)
|
|
118
|
-
┌─────┬───────┬────────────────┐
|
|
119
|
-
│ a ┆ b_sum ┆ c_mean_squared │
|
|
120
|
-
│ --- ┆ --- ┆ --- │
|
|
121
|
-
│ str ┆ i64 ┆ f64 │
|
|
122
|
-
╞═════╪═══════╪════════════════╡
|
|
123
|
-
│ a ┆ 2 ┆ 17.0 │
|
|
124
|
-
│ c ┆ 3 ┆ 1.0 │
|
|
125
|
-
│ b ┆ 5 ┆ 10.0 │
|
|
126
|
-
└─────┴───────┴────────────────┘
|
|
127
|
-
|
|
128
|
-
Use keyword arguments to easily name your expression inputs.
|
|
129
|
-
|
|
130
|
-
>>> ldf.group_by("a").agg(
|
|
131
|
-
... b_sum=pl.sum("b"),
|
|
132
|
-
... c_mean_squared=(pl.col("c") ** 2).mean(),
|
|
133
|
-
... ).collect() # doctest: +IGNORE_RESULT
|
|
134
|
-
shape: (3, 3)
|
|
135
|
-
┌─────┬───────┬────────────────┐
|
|
136
|
-
│ a ┆ b_sum ┆ c_mean_squared │
|
|
137
|
-
│ --- ┆ --- ┆ --- │
|
|
138
|
-
│ str ┆ i64 ┆ f64 │
|
|
139
|
-
╞═════╪═══════╪════════════════╡
|
|
140
|
-
│ a ┆ 2 ┆ 17.0 │
|
|
141
|
-
│ c ┆ 3 ┆ 1.0 │
|
|
142
|
-
│ b ┆ 5 ┆ 10.0 │
|
|
143
|
-
└─────┴───────┴────────────────┘
|
|
144
|
-
"""
|
|
145
|
-
if aggs and isinstance(aggs[0], dict):
|
|
146
|
-
msg = (
|
|
147
|
-
"specifying aggregations as a dictionary is not supported"
|
|
148
|
-
"\n\nTry unpacking the dictionary to take advantage of the keyword syntax"
|
|
149
|
-
" of the `agg` method."
|
|
150
|
-
)
|
|
151
|
-
raise TypeError(msg)
|
|
152
|
-
|
|
153
|
-
pyexprs = parse_into_list_of_expressions(*aggs, **named_aggs)
|
|
154
|
-
return wrap_ldf(self.lgb.agg(pyexprs))
|
|
155
|
-
|
|
156
|
-
def map_groups(
|
|
157
|
-
self,
|
|
158
|
-
function: Callable[[DataFrame], DataFrame],
|
|
159
|
-
schema: SchemaDict | None,
|
|
160
|
-
) -> LazyFrame:
|
|
161
|
-
"""
|
|
162
|
-
Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
|
|
163
|
-
|
|
164
|
-
.. warning::
|
|
165
|
-
This method is much slower than the native expressions API.
|
|
166
|
-
Only use it if you cannot implement your logic otherwise.
|
|
167
|
-
|
|
168
|
-
Using this is considered an anti-pattern as it will be very slow because:
|
|
169
|
-
|
|
170
|
-
- it forces the engine to materialize the whole `DataFrames` for the groups.
|
|
171
|
-
- it is not parallelized
|
|
172
|
-
- it blocks optimizations as the passed python function is opaque to the
|
|
173
|
-
optimizer
|
|
174
|
-
|
|
175
|
-
The idiomatic way to apply custom functions over multiple columns is using:
|
|
176
|
-
|
|
177
|
-
`pl.struct([my_columns]).apply(lambda struct_series: ..)`
|
|
178
|
-
|
|
179
|
-
Parameters
|
|
180
|
-
----------
|
|
181
|
-
function
|
|
182
|
-
Function to apply over each group of the `LazyFrame`.
|
|
183
|
-
schema
|
|
184
|
-
Schema of the output function. This has to be known statically. If the
|
|
185
|
-
given schema is incorrect, this is a bug in the caller's query and may
|
|
186
|
-
lead to errors. If set to None, polars assumes the schema is unchanged.
|
|
187
|
-
|
|
188
|
-
Examples
|
|
189
|
-
--------
|
|
190
|
-
For each color group sample two rows:
|
|
191
|
-
|
|
192
|
-
>>> df = pl.DataFrame(
|
|
193
|
-
... {
|
|
194
|
-
... "id": [0, 1, 2, 3, 4],
|
|
195
|
-
... "color": ["red", "green", "green", "red", "red"],
|
|
196
|
-
... "shape": ["square", "triangle", "square", "triangle", "square"],
|
|
197
|
-
... }
|
|
198
|
-
... )
|
|
199
|
-
>>> (
|
|
200
|
-
... df.lazy()
|
|
201
|
-
... .group_by("color")
|
|
202
|
-
... .map_groups(lambda group_df: group_df.sample(2), schema=None)
|
|
203
|
-
... .collect()
|
|
204
|
-
... ) # doctest: +IGNORE_RESULT
|
|
205
|
-
shape: (4, 3)
|
|
206
|
-
┌─────┬───────┬──────────┐
|
|
207
|
-
│ id ┆ color ┆ shape │
|
|
208
|
-
│ --- ┆ --- ┆ --- │
|
|
209
|
-
│ i64 ┆ str ┆ str │
|
|
210
|
-
╞═════╪═══════╪══════════╡
|
|
211
|
-
│ 1 ┆ green ┆ triangle │
|
|
212
|
-
│ 2 ┆ green ┆ square │
|
|
213
|
-
│ 4 ┆ red ┆ square │
|
|
214
|
-
│ 3 ┆ red ┆ triangle │
|
|
215
|
-
└─────┴───────┴──────────┘
|
|
216
|
-
|
|
217
|
-
It is better to implement this with an expression:
|
|
218
|
-
|
|
219
|
-
>>> df.lazy().filter(
|
|
220
|
-
... pl.int_range(pl.len()).shuffle().over("color") < 2
|
|
221
|
-
... ).collect() # doctest: +IGNORE_RESULT
|
|
222
|
-
"""
|
|
223
|
-
return wrap_ldf(
|
|
224
|
-
self.lgb.map_groups(lambda df: function(wrap_df(df))._df, schema)
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
def head(self, n: int = 5) -> LazyFrame:
|
|
228
|
-
"""
|
|
229
|
-
Get the first `n` rows of each group.
|
|
230
|
-
|
|
231
|
-
Parameters
|
|
232
|
-
----------
|
|
233
|
-
n
|
|
234
|
-
Number of rows to return.
|
|
235
|
-
|
|
236
|
-
Examples
|
|
237
|
-
--------
|
|
238
|
-
>>> df = pl.DataFrame(
|
|
239
|
-
... {
|
|
240
|
-
... "letters": ["c", "c", "a", "c", "a", "b"],
|
|
241
|
-
... "nrs": [1, 2, 3, 4, 5, 6],
|
|
242
|
-
... }
|
|
243
|
-
... )
|
|
244
|
-
>>> df
|
|
245
|
-
shape: (6, 2)
|
|
246
|
-
┌─────────┬─────┐
|
|
247
|
-
│ letters ┆ nrs │
|
|
248
|
-
│ --- ┆ --- │
|
|
249
|
-
│ str ┆ i64 │
|
|
250
|
-
╞═════════╪═════╡
|
|
251
|
-
│ c ┆ 1 │
|
|
252
|
-
│ c ┆ 2 │
|
|
253
|
-
│ a ┆ 3 │
|
|
254
|
-
│ c ┆ 4 │
|
|
255
|
-
│ a ┆ 5 │
|
|
256
|
-
│ b ┆ 6 │
|
|
257
|
-
└─────────┴─────┘
|
|
258
|
-
>>> df.group_by("letters").head(2).sort("letters")
|
|
259
|
-
shape: (5, 2)
|
|
260
|
-
┌─────────┬─────┐
|
|
261
|
-
│ letters ┆ nrs │
|
|
262
|
-
│ --- ┆ --- │
|
|
263
|
-
│ str ┆ i64 │
|
|
264
|
-
╞═════════╪═════╡
|
|
265
|
-
│ a ┆ 3 │
|
|
266
|
-
│ a ┆ 5 │
|
|
267
|
-
│ b ┆ 6 │
|
|
268
|
-
│ c ┆ 1 │
|
|
269
|
-
│ c ┆ 2 │
|
|
270
|
-
└─────────┴─────┘
|
|
271
|
-
"""
|
|
272
|
-
return wrap_ldf(self.lgb.head(n))
|
|
273
|
-
|
|
274
|
-
def tail(self, n: int = 5) -> LazyFrame:
|
|
275
|
-
"""
|
|
276
|
-
Get the last `n` rows of each group.
|
|
277
|
-
|
|
278
|
-
Parameters
|
|
279
|
-
----------
|
|
280
|
-
n
|
|
281
|
-
Number of rows to return.
|
|
282
|
-
|
|
283
|
-
Examples
|
|
284
|
-
--------
|
|
285
|
-
>>> df = pl.DataFrame(
|
|
286
|
-
... {
|
|
287
|
-
... "letters": ["c", "c", "a", "c", "a", "b"],
|
|
288
|
-
... "nrs": [1, 2, 3, 4, 5, 6],
|
|
289
|
-
... }
|
|
290
|
-
... )
|
|
291
|
-
>>> df
|
|
292
|
-
shape: (6, 2)
|
|
293
|
-
┌─────────┬─────┐
|
|
294
|
-
│ letters ┆ nrs │
|
|
295
|
-
│ --- ┆ --- │
|
|
296
|
-
│ str ┆ i64 │
|
|
297
|
-
╞═════════╪═════╡
|
|
298
|
-
│ c ┆ 1 │
|
|
299
|
-
│ c ┆ 2 │
|
|
300
|
-
│ a ┆ 3 │
|
|
301
|
-
│ c ┆ 4 │
|
|
302
|
-
│ a ┆ 5 │
|
|
303
|
-
│ b ┆ 6 │
|
|
304
|
-
└─────────┴─────┘
|
|
305
|
-
>>> df.group_by("letters").tail(2).sort("letters")
|
|
306
|
-
shape: (5, 2)
|
|
307
|
-
┌─────────┬─────┐
|
|
308
|
-
│ letters ┆ nrs │
|
|
309
|
-
│ --- ┆ --- │
|
|
310
|
-
│ str ┆ i64 │
|
|
311
|
-
╞═════════╪═════╡
|
|
312
|
-
│ a ┆ 3 │
|
|
313
|
-
│ a ┆ 5 │
|
|
314
|
-
│ b ┆ 6 │
|
|
315
|
-
│ c ┆ 2 │
|
|
316
|
-
│ c ┆ 4 │
|
|
317
|
-
└─────────┴─────┘
|
|
318
|
-
"""
|
|
319
|
-
return wrap_ldf(self.lgb.tail(n))
|
|
320
|
-
|
|
321
|
-
def all(self) -> LazyFrame:
|
|
322
|
-
"""
|
|
323
|
-
Aggregate the groups into Series.
|
|
324
|
-
|
|
325
|
-
Examples
|
|
326
|
-
--------
|
|
327
|
-
>>> ldf = pl.DataFrame(
|
|
328
|
-
... {
|
|
329
|
-
... "a": ["one", "two", "one", "two"],
|
|
330
|
-
... "b": [1, 2, 3, 4],
|
|
331
|
-
... }
|
|
332
|
-
... ).lazy()
|
|
333
|
-
>>> ldf.group_by("a", maintain_order=True).all().collect()
|
|
334
|
-
shape: (2, 2)
|
|
335
|
-
┌─────┬───────────┐
|
|
336
|
-
│ a ┆ b │
|
|
337
|
-
│ --- ┆ --- │
|
|
338
|
-
│ str ┆ list[i64] │
|
|
339
|
-
╞═════╪═══════════╡
|
|
340
|
-
│ one ┆ [1, 3] │
|
|
341
|
-
│ two ┆ [2, 4] │
|
|
342
|
-
└─────┴───────────┘
|
|
343
|
-
"""
|
|
344
|
-
return self.agg(F.all())
|
|
345
|
-
|
|
346
|
-
def len(self, name: str | None = None) -> LazyFrame:
|
|
347
|
-
"""
|
|
348
|
-
Return the number of rows in each group.
|
|
349
|
-
|
|
350
|
-
Parameters
|
|
351
|
-
----------
|
|
352
|
-
name
|
|
353
|
-
Assign a name to the resulting column; if unset, defaults to "len".
|
|
354
|
-
|
|
355
|
-
Examples
|
|
356
|
-
--------
|
|
357
|
-
>>> lf = pl.LazyFrame({"a": ["Apple", "Apple", "Orange"], "b": [1, None, 2]})
|
|
358
|
-
>>> lf.group_by("a").len().collect() # doctest: +IGNORE_RESULT
|
|
359
|
-
shape: (2, 2)
|
|
360
|
-
┌────────┬─────┐
|
|
361
|
-
│ a ┆ len │
|
|
362
|
-
│ --- ┆ --- │
|
|
363
|
-
│ str ┆ u32 │
|
|
364
|
-
╞════════╪═════╡
|
|
365
|
-
│ Apple ┆ 2 │
|
|
366
|
-
│ Orange ┆ 1 │
|
|
367
|
-
└────────┴─────┘
|
|
368
|
-
>>> lf.group_by("a").len(name="n").collect() # doctest: +IGNORE_RESULT
|
|
369
|
-
shape: (2, 2)
|
|
370
|
-
┌────────┬─────┐
|
|
371
|
-
│ a ┆ n │
|
|
372
|
-
│ --- ┆ --- │
|
|
373
|
-
│ str ┆ u32 │
|
|
374
|
-
╞════════╪═════╡
|
|
375
|
-
│ Apple ┆ 2 │
|
|
376
|
-
│ Orange ┆ 1 │
|
|
377
|
-
└────────┴─────┘
|
|
378
|
-
"""
|
|
379
|
-
len_expr = F.len()
|
|
380
|
-
if name is not None:
|
|
381
|
-
len_expr = len_expr.alias(name)
|
|
382
|
-
return self.agg(len_expr)
|
|
383
|
-
|
|
384
|
-
@deprecated("`count` was renamed; use `len` instead")
|
|
385
|
-
def count(self) -> LazyFrame:
|
|
386
|
-
"""
|
|
387
|
-
Return the number of rows in each group.
|
|
388
|
-
|
|
389
|
-
.. deprecated:: 0.20.5
|
|
390
|
-
This method has been renamed to :func:`LazyGroupBy.len`.
|
|
391
|
-
|
|
392
|
-
Rows containing null values count towards the total.
|
|
393
|
-
|
|
394
|
-
Examples
|
|
395
|
-
--------
|
|
396
|
-
>>> lf = pl.LazyFrame(
|
|
397
|
-
... {
|
|
398
|
-
... "a": ["Apple", "Apple", "Orange"],
|
|
399
|
-
... "b": [1, None, 2],
|
|
400
|
-
... }
|
|
401
|
-
... )
|
|
402
|
-
>>> lf.group_by("a").count().collect() # doctest: +SKIP
|
|
403
|
-
shape: (2, 2)
|
|
404
|
-
┌────────┬───────┐
|
|
405
|
-
│ a ┆ count │
|
|
406
|
-
│ --- ┆ --- │
|
|
407
|
-
│ str ┆ u32 │
|
|
408
|
-
╞════════╪═══════╡
|
|
409
|
-
│ Apple ┆ 2 │
|
|
410
|
-
│ Orange ┆ 1 │
|
|
411
|
-
└────────┴───────┘
|
|
412
|
-
"""
|
|
413
|
-
return self.agg(F.len().alias("count"))
|
|
414
|
-
|
|
415
|
-
def first(self) -> LazyFrame:
|
|
416
|
-
"""
|
|
417
|
-
Aggregate the first values in the group.
|
|
418
|
-
|
|
419
|
-
Examples
|
|
420
|
-
--------
|
|
421
|
-
>>> ldf = pl.DataFrame(
|
|
422
|
-
... {
|
|
423
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
424
|
-
... "b": [0.5, 0.5, 4, 10, 13, 14],
|
|
425
|
-
... "c": [True, True, True, False, False, True],
|
|
426
|
-
... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
|
|
427
|
-
... }
|
|
428
|
-
... ).lazy()
|
|
429
|
-
>>> ldf.group_by("d", maintain_order=True).first().collect()
|
|
430
|
-
shape: (3, 4)
|
|
431
|
-
┌────────┬─────┬──────┬───────┐
|
|
432
|
-
│ d ┆ a ┆ b ┆ c │
|
|
433
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
434
|
-
│ str ┆ i64 ┆ f64 ┆ bool │
|
|
435
|
-
╞════════╪═════╪══════╪═══════╡
|
|
436
|
-
│ Apple ┆ 1 ┆ 0.5 ┆ true │
|
|
437
|
-
│ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
438
|
-
│ Banana ┆ 4 ┆ 13.0 ┆ false │
|
|
439
|
-
└────────┴─────┴──────┴───────┘
|
|
440
|
-
"""
|
|
441
|
-
return self.agg(F.all().first())
|
|
442
|
-
|
|
443
|
-
def last(self) -> LazyFrame:
|
|
444
|
-
"""
|
|
445
|
-
Aggregate the last values in the group.
|
|
446
|
-
|
|
447
|
-
Examples
|
|
448
|
-
--------
|
|
449
|
-
>>> ldf = pl.DataFrame(
|
|
450
|
-
... {
|
|
451
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
452
|
-
... "b": [0.5, 0.5, 4, 10, 14, 13],
|
|
453
|
-
... "c": [True, True, True, False, False, True],
|
|
454
|
-
... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
|
|
455
|
-
... }
|
|
456
|
-
... ).lazy()
|
|
457
|
-
>>> ldf.group_by("d", maintain_order=True).last().collect()
|
|
458
|
-
shape: (3, 4)
|
|
459
|
-
┌────────┬─────┬──────┬───────┐
|
|
460
|
-
│ d ┆ a ┆ b ┆ c │
|
|
461
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
462
|
-
│ str ┆ i64 ┆ f64 ┆ bool │
|
|
463
|
-
╞════════╪═════╪══════╪═══════╡
|
|
464
|
-
│ Apple ┆ 3 ┆ 10.0 ┆ false │
|
|
465
|
-
│ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
466
|
-
│ Banana ┆ 5 ┆ 13.0 ┆ true │
|
|
467
|
-
└────────┴─────┴──────┴───────┘
|
|
468
|
-
"""
|
|
469
|
-
return self.agg(F.all().last())
|
|
470
|
-
|
|
471
|
-
def max(self) -> LazyFrame:
|
|
472
|
-
"""
|
|
473
|
-
Reduce the groups to the maximal value.
|
|
474
|
-
|
|
475
|
-
Examples
|
|
476
|
-
--------
|
|
477
|
-
>>> ldf = pl.DataFrame(
|
|
478
|
-
... {
|
|
479
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
480
|
-
... "b": [0.5, 0.5, 4, 10, 13, 14],
|
|
481
|
-
... "c": [True, True, True, False, False, True],
|
|
482
|
-
... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
|
|
483
|
-
... }
|
|
484
|
-
... ).lazy()
|
|
485
|
-
>>> ldf.group_by("d", maintain_order=True).max().collect()
|
|
486
|
-
shape: (3, 4)
|
|
487
|
-
┌────────┬─────┬──────┬──────┐
|
|
488
|
-
│ d ┆ a ┆ b ┆ c │
|
|
489
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
490
|
-
│ str ┆ i64 ┆ f64 ┆ bool │
|
|
491
|
-
╞════════╪═════╪══════╪══════╡
|
|
492
|
-
│ Apple ┆ 3 ┆ 10.0 ┆ true │
|
|
493
|
-
│ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
494
|
-
│ Banana ┆ 5 ┆ 14.0 ┆ true │
|
|
495
|
-
└────────┴─────┴──────┴──────┘
|
|
496
|
-
"""
|
|
497
|
-
return self.agg(F.all().max())
|
|
498
|
-
|
|
499
|
-
def mean(self) -> LazyFrame:
|
|
500
|
-
"""
|
|
501
|
-
Reduce the groups to the mean values.
|
|
502
|
-
|
|
503
|
-
Examples
|
|
504
|
-
--------
|
|
505
|
-
>>> ldf = pl.DataFrame(
|
|
506
|
-
... {
|
|
507
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
508
|
-
... "b": [0.5, 0.5, 4, 10, 13, 14],
|
|
509
|
-
... "c": [True, True, True, False, False, True],
|
|
510
|
-
... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
|
|
511
|
-
... }
|
|
512
|
-
... ).lazy()
|
|
513
|
-
>>> ldf.group_by("d", maintain_order=True).mean().collect()
|
|
514
|
-
shape: (3, 4)
|
|
515
|
-
┌────────┬─────┬──────────┬──────────┐
|
|
516
|
-
│ d ┆ a ┆ b ┆ c │
|
|
517
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
518
|
-
│ str ┆ f64 ┆ f64 ┆ f64 │
|
|
519
|
-
╞════════╪═════╪══════════╪══════════╡
|
|
520
|
-
│ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
|
|
521
|
-
│ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
|
|
522
|
-
│ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
|
|
523
|
-
└────────┴─────┴──────────┴──────────┘
|
|
524
|
-
"""
|
|
525
|
-
return self.agg(F.all().mean())
|
|
526
|
-
|
|
527
|
-
def median(self) -> LazyFrame:
|
|
528
|
-
"""
|
|
529
|
-
Return the median per group.
|
|
530
|
-
|
|
531
|
-
Examples
|
|
532
|
-
--------
|
|
533
|
-
>>> ldf = pl.DataFrame(
|
|
534
|
-
... {
|
|
535
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
536
|
-
... "b": [0.5, 0.5, 4, 10, 13, 14],
|
|
537
|
-
... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
|
|
538
|
-
... }
|
|
539
|
-
... ).lazy()
|
|
540
|
-
>>> ldf.group_by("d", maintain_order=True).median().collect()
|
|
541
|
-
shape: (2, 3)
|
|
542
|
-
┌────────┬─────┬──────┐
|
|
543
|
-
│ d ┆ a ┆ b │
|
|
544
|
-
│ --- ┆ --- ┆ --- │
|
|
545
|
-
│ str ┆ f64 ┆ f64 │
|
|
546
|
-
╞════════╪═════╪══════╡
|
|
547
|
-
│ Apple ┆ 2.0 ┆ 4.0 │
|
|
548
|
-
│ Banana ┆ 4.0 ┆ 13.0 │
|
|
549
|
-
└────────┴─────┴──────┘
|
|
550
|
-
"""
|
|
551
|
-
return self.agg(F.all().median())
|
|
552
|
-
|
|
553
|
-
def min(self) -> LazyFrame:
|
|
554
|
-
"""
|
|
555
|
-
Reduce the groups to the minimal value.
|
|
556
|
-
|
|
557
|
-
Examples
|
|
558
|
-
--------
|
|
559
|
-
>>> ldf = pl.DataFrame(
|
|
560
|
-
... {
|
|
561
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
562
|
-
... "b": [0.5, 0.5, 4, 10, 13, 14],
|
|
563
|
-
... "c": [True, True, True, False, False, True],
|
|
564
|
-
... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
|
|
565
|
-
... }
|
|
566
|
-
... ).lazy()
|
|
567
|
-
>>> ldf.group_by("d", maintain_order=True).min().collect()
|
|
568
|
-
shape: (3, 4)
|
|
569
|
-
┌────────┬─────┬──────┬───────┐
|
|
570
|
-
│ d ┆ a ┆ b ┆ c │
|
|
571
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
572
|
-
│ str ┆ i64 ┆ f64 ┆ bool │
|
|
573
|
-
╞════════╪═════╪══════╪═══════╡
|
|
574
|
-
│ Apple ┆ 1 ┆ 0.5 ┆ false │
|
|
575
|
-
│ Orange ┆ 2 ┆ 0.5 ┆ true │
|
|
576
|
-
│ Banana ┆ 4 ┆ 13.0 ┆ false │
|
|
577
|
-
└────────┴─────┴──────┴───────┘
|
|
578
|
-
"""
|
|
579
|
-
return self.agg(F.all().min())
|
|
580
|
-
|
|
581
|
-
def n_unique(self) -> LazyFrame:
|
|
582
|
-
"""
|
|
583
|
-
Count the unique values per group.
|
|
584
|
-
|
|
585
|
-
Examples
|
|
586
|
-
--------
|
|
587
|
-
>>> ldf = pl.DataFrame(
|
|
588
|
-
... {
|
|
589
|
-
... "a": [1, 2, 1, 3, 4, 5],
|
|
590
|
-
... "b": [0.5, 0.5, 0.5, 10, 13, 14],
|
|
591
|
-
... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
|
|
592
|
-
... }
|
|
593
|
-
... ).lazy()
|
|
594
|
-
>>> ldf.group_by("d", maintain_order=True).n_unique().collect()
|
|
595
|
-
shape: (2, 3)
|
|
596
|
-
┌────────┬─────┬─────┐
|
|
597
|
-
│ d ┆ a ┆ b │
|
|
598
|
-
│ --- ┆ --- ┆ --- │
|
|
599
|
-
│ str ┆ u32 ┆ u32 │
|
|
600
|
-
╞════════╪═════╪═════╡
|
|
601
|
-
│ Apple ┆ 2 ┆ 2 │
|
|
602
|
-
│ Banana ┆ 3 ┆ 3 │
|
|
603
|
-
└────────┴─────┴─────┘
|
|
604
|
-
"""
|
|
605
|
-
return self.agg(F.all().n_unique())
|
|
606
|
-
|
|
607
|
-
def quantile(
|
|
608
|
-
self, quantile: float, interpolation: QuantileMethod = "nearest"
|
|
609
|
-
) -> LazyFrame:
|
|
610
|
-
"""
|
|
611
|
-
Compute the quantile per group.
|
|
612
|
-
|
|
613
|
-
Parameters
|
|
614
|
-
----------
|
|
615
|
-
quantile
|
|
616
|
-
Quantile between 0.0 and 1.0.
|
|
617
|
-
interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'}
|
|
618
|
-
Interpolation method.
|
|
619
|
-
|
|
620
|
-
Examples
|
|
621
|
-
--------
|
|
622
|
-
>>> ldf = pl.DataFrame(
|
|
623
|
-
... {
|
|
624
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
625
|
-
... "b": [0.5, 0.5, 4, 10, 13, 14],
|
|
626
|
-
... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
|
|
627
|
-
... }
|
|
628
|
-
... ).lazy()
|
|
629
|
-
>>> ldf.group_by("d", maintain_order=True).quantile(1).collect()
|
|
630
|
-
shape: (3, 3)
|
|
631
|
-
┌────────┬─────┬──────┐
|
|
632
|
-
│ d ┆ a ┆ b │
|
|
633
|
-
│ --- ┆ --- ┆ --- │
|
|
634
|
-
│ str ┆ f64 ┆ f64 │
|
|
635
|
-
╞════════╪═════╪══════╡
|
|
636
|
-
│ Apple ┆ 3.0 ┆ 10.0 │
|
|
637
|
-
│ Orange ┆ 2.0 ┆ 0.5 │
|
|
638
|
-
│ Banana ┆ 5.0 ┆ 14.0 │
|
|
639
|
-
└────────┴─────┴──────┘
|
|
640
|
-
""" # noqa: W505
|
|
641
|
-
return self.agg(F.all().quantile(quantile, interpolation=interpolation))
|
|
642
|
-
|
|
643
|
-
def sum(self) -> LazyFrame:
|
|
644
|
-
"""
|
|
645
|
-
Reduce the groups to the sum.
|
|
646
|
-
|
|
647
|
-
Examples
|
|
648
|
-
--------
|
|
649
|
-
>>> ldf = pl.DataFrame(
|
|
650
|
-
... {
|
|
651
|
-
... "a": [1, 2, 2, 3, 4, 5],
|
|
652
|
-
... "b": [0.5, 0.5, 4, 10, 13, 14],
|
|
653
|
-
... "c": [True, True, True, False, False, True],
|
|
654
|
-
... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
|
|
655
|
-
... }
|
|
656
|
-
... ).lazy()
|
|
657
|
-
>>> ldf.group_by("d", maintain_order=True).sum().collect()
|
|
658
|
-
shape: (3, 4)
|
|
659
|
-
┌────────┬─────┬──────┬─────┐
|
|
660
|
-
│ d ┆ a ┆ b ┆ c │
|
|
661
|
-
│ --- ┆ --- ┆ --- ┆ --- │
|
|
662
|
-
│ str ┆ i64 ┆ f64 ┆ u32 │
|
|
663
|
-
╞════════╪═════╪══════╪═════╡
|
|
664
|
-
│ Apple ┆ 6 ┆ 14.5 ┆ 2 │
|
|
665
|
-
│ Orange ┆ 2 ┆ 0.5 ┆ 1 │
|
|
666
|
-
│ Banana ┆ 9 ┆ 27.0 ┆ 1 │
|
|
667
|
-
└────────┴─────┴──────┴─────┘
|
|
668
|
-
"""
|
|
669
|
-
return self.agg(F.all().sum())
|