polars-runtime-compat 1.34.0b3__cp39-abi3-win_amd64.whl → 1.34.0b4__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  2. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
  3. polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -1,1067 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, Any, Callable
4
-
5
- from polars import functions as F
6
- from polars._utils.convert import parse_as_duration_string
7
- from polars._utils.deprecation import deprecated
8
-
9
- if TYPE_CHECKING:
10
- import sys
11
- from collections.abc import Iterable
12
- from datetime import timedelta
13
-
14
- from polars import DataFrame
15
- from polars._typing import (
16
- ClosedInterval,
17
- IntoExpr,
18
- Label,
19
- QuantileMethod,
20
- SchemaDict,
21
- StartBy,
22
- )
23
-
24
- if sys.version_info >= (3, 11):
25
- from typing import Self
26
- else:
27
- from typing_extensions import Self
28
-
29
- if sys.version_info >= (3, 13):
30
- from warnings import deprecated
31
- else:
32
- from typing_extensions import deprecated # noqa: TC004
33
-
34
-
35
- class GroupBy:
36
- """Starts a new GroupBy operation."""
37
-
38
- def __init__(
39
- self,
40
- df: DataFrame,
41
- *by: IntoExpr | Iterable[IntoExpr],
42
- maintain_order: bool,
43
- **named_by: IntoExpr,
44
- ) -> None:
45
- """
46
- Utility class for performing a group by operation over the given DataFrame.
47
-
48
- Generated by calling `df.group_by(...)`.
49
-
50
- Parameters
51
- ----------
52
- df
53
- DataFrame to perform the group by operation over.
54
- *by
55
- Column or columns to group by. Accepts expression input. Strings are parsed
56
- as column names.
57
- maintain_order
58
- Ensure that the order of the groups is consistent with the input data.
59
- This is slower than a default group by.
60
- **named_by
61
- Additional column(s) to group by, specified as keyword arguments.
62
- The columns will be named as the keyword used.
63
- """
64
- self.df = df
65
- self.by = by
66
- self.named_by = named_by
67
- self.maintain_order = maintain_order
68
-
69
- def __iter__(self) -> Self:
70
- """
71
- Allows iteration over the groups of the group by operation.
72
-
73
- Each group is represented by a tuple of `(name, data)`. The group names are
74
- tuples of the distinct group values that identify each group.
75
-
76
- Examples
77
- --------
78
- >>> df = pl.DataFrame({"foo": ["a", "a", "b"], "bar": [1, 2, 3]})
79
- >>> for name, data in df.group_by("foo"): # doctest: +SKIP
80
- ... print(name)
81
- ... print(data)
82
- (a,)
83
- shape: (2, 2)
84
- ┌─────┬─────┐
85
- │ foo ┆ bar │
86
- │ --- ┆ --- │
87
- │ str ┆ i64 │
88
- ╞═════╪═════╡
89
- │ a ┆ 1 │
90
- │ a ┆ 2 │
91
- └─────┴─────┘
92
- (b,)
93
- shape: (1, 2)
94
- ┌─────┬─────┐
95
- │ foo ┆ bar │
96
- │ --- ┆ --- │
97
- │ str ┆ i64 │
98
- ╞═════╪═════╡
99
- │ b ┆ 3 │
100
- └─────┴─────┘
101
- """
102
- # Every group gather can trigger a rechunk, so do early.
103
- from polars.lazyframe.opt_flags import QueryOptFlags
104
-
105
- self.df = self.df.rechunk()
106
- temp_col = "__POLARS_GB_GROUP_INDICES"
107
- groups_df = (
108
- self.df.lazy()
109
- .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
110
- .agg(F.first().agg_groups().alias(temp_col))
111
- .collect(optimizations=QueryOptFlags.none())
112
- )
113
-
114
- self._group_names = groups_df.select(F.all().exclude(temp_col)).iter_rows()
115
- self._group_indices = groups_df.select(temp_col).to_series()
116
- self._current_index = 0
117
-
118
- return self
119
-
120
- def __next__(self) -> tuple[tuple[Any, ...], DataFrame]:
121
- if self._current_index >= len(self._group_indices):
122
- raise StopIteration
123
-
124
- group_name = next(self._group_names)
125
- group_data = self.df[self._group_indices[self._current_index], :]
126
- self._current_index += 1
127
-
128
- return group_name, group_data
129
-
130
- def agg(
131
- self,
132
- *aggs: IntoExpr | Iterable[IntoExpr],
133
- **named_aggs: IntoExpr,
134
- ) -> DataFrame:
135
- """
136
- Compute aggregations for each group of a group by operation.
137
-
138
- Parameters
139
- ----------
140
- *aggs
141
- Aggregations to compute for each group of the group by operation,
142
- specified as positional arguments.
143
- Accepts expression input. Strings are parsed as column names.
144
- **named_aggs
145
- Additional aggregations, specified as keyword arguments.
146
- The resulting columns will be renamed to the keyword used.
147
-
148
- Examples
149
- --------
150
- Compute the aggregation of the columns for each group.
151
-
152
- >>> df = pl.DataFrame(
153
- ... {
154
- ... "a": ["a", "b", "a", "b", "c"],
155
- ... "b": [1, 2, 1, 3, 3],
156
- ... "c": [5, 4, 3, 2, 1],
157
- ... }
158
- ... )
159
- >>> df.group_by("a").agg(pl.col("b"), pl.col("c")) # doctest: +IGNORE_RESULT
160
- shape: (3, 3)
161
- ┌─────┬───────────┬───────────┐
162
- │ a ┆ b ┆ c │
163
- │ --- ┆ --- ┆ --- │
164
- │ str ┆ list[i64] ┆ list[i64] │
165
- ╞═════╪═══════════╪═══════════╡
166
- │ a ┆ [1, 1] ┆ [5, 3] │
167
- ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
168
- │ b ┆ [2, 3] ┆ [4, 2] │
169
- ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
170
- │ c ┆ [3] ┆ [1] │
171
- └─────┴───────────┴───────────┘
172
-
173
- Compute the sum of a column for each group.
174
-
175
- >>> df.group_by("a").agg(pl.col("b").sum()) # doctest: +IGNORE_RESULT
176
- shape: (3, 2)
177
- ┌─────┬─────┐
178
- │ a ┆ b │
179
- │ --- ┆ --- │
180
- │ str ┆ i64 │
181
- ╞═════╪═════╡
182
- │ a ┆ 2 │
183
- │ b ┆ 5 │
184
- │ c ┆ 3 │
185
- └─────┴─────┘
186
-
187
- Compute multiple aggregates at once by passing a list of expressions.
188
-
189
- >>> df.group_by("a").agg([pl.sum("b"), pl.mean("c")]) # doctest: +IGNORE_RESULT
190
- shape: (3, 3)
191
- ┌─────┬─────┬─────┐
192
- │ a ┆ b ┆ c │
193
- │ --- ┆ --- ┆ --- │
194
- │ str ┆ i64 ┆ f64 │
195
- ╞═════╪═════╪═════╡
196
- │ c ┆ 3 ┆ 1.0 │
197
- │ a ┆ 2 ┆ 4.0 │
198
- │ b ┆ 5 ┆ 3.0 │
199
- └─────┴─────┴─────┘
200
-
201
- Or use positional arguments to compute multiple aggregations in the same way.
202
-
203
- >>> df.group_by("a").agg(
204
- ... pl.sum("b").name.suffix("_sum"),
205
- ... (pl.col("c") ** 2).mean().name.suffix("_mean_squared"),
206
- ... ) # doctest: +IGNORE_RESULT
207
- shape: (3, 3)
208
- ┌─────┬───────┬────────────────┐
209
- │ a ┆ b_sum ┆ c_mean_squared │
210
- │ --- ┆ --- ┆ --- │
211
- │ str ┆ i64 ┆ f64 │
212
- ╞═════╪═══════╪════════════════╡
213
- │ a ┆ 2 ┆ 17.0 │
214
- │ c ┆ 3 ┆ 1.0 │
215
- │ b ┆ 5 ┆ 10.0 │
216
- └─────┴───────┴────────────────┘
217
-
218
- Use keyword arguments to easily name your expression inputs.
219
-
220
- >>> df.group_by("a").agg(
221
- ... b_sum=pl.sum("b"),
222
- ... c_mean_squared=(pl.col("c") ** 2).mean(),
223
- ... ) # doctest: +IGNORE_RESULT
224
- shape: (3, 3)
225
- ┌─────┬───────┬────────────────┐
226
- │ a ┆ b_sum ┆ c_mean_squared │
227
- │ --- ┆ --- ┆ --- │
228
- │ str ┆ i64 ┆ f64 │
229
- ╞═════╪═══════╪════════════════╡
230
- │ a ┆ 2 ┆ 17.0 │
231
- │ c ┆ 3 ┆ 1.0 │
232
- │ b ┆ 5 ┆ 10.0 │
233
- └─────┴───────┴────────────────┘
234
- """
235
- from polars.lazyframe.opt_flags import QueryOptFlags
236
-
237
- return (
238
- self.df.lazy()
239
- .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
240
- .agg(*aggs, **named_aggs)
241
- .collect(optimizations=QueryOptFlags.none())
242
- )
243
-
244
- def map_groups(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
245
- """
246
- Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
247
-
248
- .. warning::
249
- This method is much slower than the native expressions API.
250
- Only use it if you cannot implement your logic otherwise.
251
-
252
- Implementing logic using a Python function is almost always *significantly*
253
- slower and more memory intensive than implementing the same logic using
254
- the native expression API because:
255
-
256
- - The native expression engine runs in Rust; UDFs run in Python.
257
- - Use of Python UDFs forces the DataFrame to be materialized in memory.
258
- - Polars-native expressions can be parallelised (UDFs cannot).
259
- - Polars-native expressions can be logically optimised (UDFs cannot).
260
-
261
- Wherever possible you should strongly prefer the native expression API
262
- to achieve the best performance.
263
-
264
- Parameters
265
- ----------
266
- function
267
- Custom function that receives a DataFrame and returns a DataFrame.
268
-
269
- Returns
270
- -------
271
- DataFrame
272
-
273
- Examples
274
- --------
275
- For each color group sample two rows:
276
-
277
- >>> df = pl.DataFrame(
278
- ... {
279
- ... "id": [0, 1, 2, 3, 4],
280
- ... "color": ["red", "green", "green", "red", "red"],
281
- ... "shape": ["square", "triangle", "square", "triangle", "square"],
282
- ... }
283
- ... )
284
- >>> df.group_by("color").map_groups(
285
- ... lambda group_df: group_df.sample(2)
286
- ... ) # doctest: +IGNORE_RESULT
287
- shape: (4, 3)
288
- ┌─────┬───────┬──────────┐
289
- │ id ┆ color ┆ shape │
290
- │ --- ┆ --- ┆ --- │
291
- │ i64 ┆ str ┆ str │
292
- ╞═════╪═══════╪══════════╡
293
- │ 1 ┆ green ┆ triangle │
294
- │ 2 ┆ green ┆ square │
295
- │ 4 ┆ red ┆ square │
296
- │ 3 ┆ red ┆ triangle │
297
- └─────┴───────┴──────────┘
298
-
299
- It is better to implement this with an expression:
300
-
301
- >>> df.filter(
302
- ... pl.int_range(pl.len()).shuffle().over("color") < 2
303
- ... ) # doctest: +IGNORE_RESULT
304
- """
305
- if self.named_by:
306
- msg = "cannot call `map_groups` when grouping by named expressions"
307
- raise TypeError(msg)
308
- if not all(isinstance(c, str) for c in self.by):
309
- msg = "cannot call `map_groups` when grouping by an expression"
310
- raise TypeError(msg)
311
-
312
- by_strs: list[str] = self.by # type: ignore[assignment]
313
-
314
- return self.df.__class__._from_pydf(
315
- self.df._df.group_by_map_groups(by_strs, function, self.maintain_order)
316
- )
317
-
318
- def head(self, n: int = 5) -> DataFrame:
319
- """
320
- Get the first `n` rows of each group.
321
-
322
- Parameters
323
- ----------
324
- n
325
- Number of rows to return.
326
-
327
- Examples
328
- --------
329
- >>> df = pl.DataFrame(
330
- ... {
331
- ... "letters": ["c", "c", "a", "c", "a", "b"],
332
- ... "nrs": [1, 2, 3, 4, 5, 6],
333
- ... }
334
- ... )
335
- >>> df
336
- shape: (6, 2)
337
- ┌─────────┬─────┐
338
- │ letters ┆ nrs │
339
- │ --- ┆ --- │
340
- │ str ┆ i64 │
341
- ╞═════════╪═════╡
342
- │ c ┆ 1 │
343
- │ c ┆ 2 │
344
- │ a ┆ 3 │
345
- │ c ┆ 4 │
346
- │ a ┆ 5 │
347
- │ b ┆ 6 │
348
- └─────────┴─────┘
349
- >>> df.group_by("letters").head(2).sort("letters")
350
- shape: (5, 2)
351
- ┌─────────┬─────┐
352
- │ letters ┆ nrs │
353
- │ --- ┆ --- │
354
- │ str ┆ i64 │
355
- ╞═════════╪═════╡
356
- │ a ┆ 3 │
357
- │ a ┆ 5 │
358
- │ b ┆ 6 │
359
- │ c ┆ 1 │
360
- │ c ┆ 2 │
361
- └─────────┴─────┘
362
- """
363
- from polars.lazyframe.opt_flags import QueryOptFlags
364
-
365
- return (
366
- self.df.lazy()
367
- .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
368
- .head(n)
369
- .collect(optimizations=QueryOptFlags._eager())
370
- )
371
-
372
- def tail(self, n: int = 5) -> DataFrame:
373
- """
374
- Get the last `n` rows of each group.
375
-
376
- Parameters
377
- ----------
378
- n
379
- Number of rows to return.
380
-
381
- Examples
382
- --------
383
- >>> df = pl.DataFrame(
384
- ... {
385
- ... "letters": ["c", "c", "a", "c", "a", "b"],
386
- ... "nrs": [1, 2, 3, 4, 5, 6],
387
- ... }
388
- ... )
389
- >>> df
390
- shape: (6, 2)
391
- ┌─────────┬─────┐
392
- │ letters ┆ nrs │
393
- │ --- ┆ --- │
394
- │ str ┆ i64 │
395
- ╞═════════╪═════╡
396
- │ c ┆ 1 │
397
- │ c ┆ 2 │
398
- │ a ┆ 3 │
399
- │ c ┆ 4 │
400
- │ a ┆ 5 │
401
- │ b ┆ 6 │
402
- └─────────┴─────┘
403
- >>> df.group_by("letters").tail(2).sort("letters")
404
- shape: (5, 2)
405
- ┌─────────┬─────┐
406
- │ letters ┆ nrs │
407
- │ --- ┆ --- │
408
- │ str ┆ i64 │
409
- ╞═════════╪═════╡
410
- │ a ┆ 3 │
411
- │ a ┆ 5 │
412
- │ b ┆ 6 │
413
- │ c ┆ 2 │
414
- │ c ┆ 4 │
415
- └─────────┴─────┘
416
- """
417
- from polars.lazyframe.opt_flags import QueryOptFlags
418
-
419
- return (
420
- self.df.lazy()
421
- .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
422
- .tail(n)
423
- .collect(optimizations=QueryOptFlags.none())
424
- )
425
-
426
- def all(self) -> DataFrame:
427
- """
428
- Aggregate the groups into Series.
429
-
430
- Examples
431
- --------
432
- >>> df = pl.DataFrame({"a": ["one", "two", "one", "two"], "b": [1, 2, 3, 4]})
433
- >>> df.group_by("a", maintain_order=True).all()
434
- shape: (2, 2)
435
- ┌─────┬───────────┐
436
- │ a ┆ b │
437
- │ --- ┆ --- │
438
- │ str ┆ list[i64] │
439
- ╞═════╪═══════════╡
440
- │ one ┆ [1, 3] │
441
- │ two ┆ [2, 4] │
442
- └─────┴───────────┘
443
- """
444
- return self.agg(F.all())
445
-
446
- def len(self, name: str | None = None) -> DataFrame:
447
- """
448
- Return the number of rows in each group.
449
-
450
- Parameters
451
- ----------
452
- name
453
- Assign a name to the resulting column; if unset, defaults to "len".
454
-
455
- Examples
456
- --------
457
- >>> df = pl.DataFrame({"a": ["Apple", "Apple", "Orange"], "b": [1, None, 2]})
458
- >>> df.group_by("a").len() # doctest: +IGNORE_RESULT
459
- shape: (2, 2)
460
- ┌────────┬─────┐
461
- │ a ┆ len │
462
- │ --- ┆ --- │
463
- │ str ┆ u32 │
464
- ╞════════╪═════╡
465
- │ Apple ┆ 2 │
466
- │ Orange ┆ 1 │
467
- └────────┴─────┘
468
- >>> df.group_by("a").len(name="n") # doctest: +IGNORE_RESULT
469
- shape: (2, 2)
470
- ┌────────┬─────┐
471
- │ a ┆ n │
472
- │ --- ┆ --- │
473
- │ str ┆ u32 │
474
- ╞════════╪═════╡
475
- │ Apple ┆ 2 │
476
- │ Orange ┆ 1 │
477
- └────────┴─────┘
478
- """
479
- len_expr = F.len()
480
- if name is not None:
481
- len_expr = len_expr.alias(name)
482
- return self.agg(len_expr)
483
-
484
- @deprecated("`GroupBy.count` was renamed; use `GroupBy.len` instead")
485
- def count(self) -> DataFrame:
486
- """
487
- Return the number of rows in each group.
488
-
489
- .. deprecated:: 0.20.5
490
- This method has been renamed to :func:`GroupBy.len`.
491
-
492
- Rows containing null values count towards the total.
493
-
494
- Examples
495
- --------
496
- >>> df = pl.DataFrame(
497
- ... {
498
- ... "a": ["Apple", "Apple", "Orange"],
499
- ... "b": [1, None, 2],
500
- ... }
501
- ... )
502
- >>> df.group_by("a").count() # doctest: +SKIP
503
- shape: (2, 2)
504
- ┌────────┬───────┐
505
- │ a ┆ count │
506
- │ --- ┆ --- │
507
- │ str ┆ u32 │
508
- ╞════════╪═══════╡
509
- │ Apple ┆ 2 │
510
- │ Orange ┆ 1 │
511
- └────────┴───────┘
512
- """
513
- return self.agg(F.len().alias("count"))
514
-
515
- def first(self) -> DataFrame:
516
- """
517
- Aggregate the first values in the group.
518
-
519
- Examples
520
- --------
521
- >>> df = pl.DataFrame(
522
- ... {
523
- ... "a": [1, 2, 2, 3, 4, 5],
524
- ... "b": [0.5, 0.5, 4, 10, 13, 14],
525
- ... "c": [True, True, True, False, False, True],
526
- ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
527
- ... }
528
- ... )
529
- >>> df.group_by("d", maintain_order=True).first()
530
- shape: (3, 4)
531
- ┌────────┬─────┬──────┬───────┐
532
- │ d ┆ a ┆ b ┆ c │
533
- │ --- ┆ --- ┆ --- ┆ --- │
534
- │ str ┆ i64 ┆ f64 ┆ bool │
535
- ╞════════╪═════╪══════╪═══════╡
536
- │ Apple ┆ 1 ┆ 0.5 ┆ true │
537
- │ Orange ┆ 2 ┆ 0.5 ┆ true │
538
- │ Banana ┆ 4 ┆ 13.0 ┆ false │
539
- └────────┴─────┴──────┴───────┘
540
- """
541
- return self.agg(F.all().first())
542
-
543
- def last(self) -> DataFrame:
544
- """
545
- Aggregate the last values in the group.
546
-
547
- Examples
548
- --------
549
- >>> df = pl.DataFrame(
550
- ... {
551
- ... "a": [1, 2, 2, 3, 4, 5],
552
- ... "b": [0.5, 0.5, 4, 10, 14, 13],
553
- ... "c": [True, True, True, False, False, True],
554
- ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
555
- ... }
556
- ... )
557
- >>> df.group_by("d", maintain_order=True).last()
558
- shape: (3, 4)
559
- ┌────────┬─────┬──────┬───────┐
560
- │ d ┆ a ┆ b ┆ c │
561
- │ --- ┆ --- ┆ --- ┆ --- │
562
- │ str ┆ i64 ┆ f64 ┆ bool │
563
- ╞════════╪═════╪══════╪═══════╡
564
- │ Apple ┆ 3 ┆ 10.0 ┆ false │
565
- │ Orange ┆ 2 ┆ 0.5 ┆ true │
566
- │ Banana ┆ 5 ┆ 13.0 ┆ true │
567
- └────────┴─────┴──────┴───────┘
568
- """
569
- return self.agg(F.all().last())
570
-
571
- def max(self) -> DataFrame:
572
- """
573
- Reduce the groups to the maximal value.
574
-
575
- Examples
576
- --------
577
- >>> df = pl.DataFrame(
578
- ... {
579
- ... "a": [1, 2, 2, 3, 4, 5],
580
- ... "b": [0.5, 0.5, 4, 10, 13, 14],
581
- ... "c": [True, True, True, False, False, True],
582
- ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
583
- ... }
584
- ... )
585
- >>> df.group_by("d", maintain_order=True).max()
586
- shape: (3, 4)
587
- ┌────────┬─────┬──────┬──────┐
588
- │ d ┆ a ┆ b ┆ c │
589
- │ --- ┆ --- ┆ --- ┆ --- │
590
- │ str ┆ i64 ┆ f64 ┆ bool │
591
- ╞════════╪═════╪══════╪══════╡
592
- │ Apple ┆ 3 ┆ 10.0 ┆ true │
593
- │ Orange ┆ 2 ┆ 0.5 ┆ true │
594
- │ Banana ┆ 5 ┆ 14.0 ┆ true │
595
- └────────┴─────┴──────┴──────┘
596
- """
597
- return self.agg(F.all().max())
598
-
599
- def mean(self) -> DataFrame:
600
- """
601
- Reduce the groups to the mean values.
602
-
603
- Examples
604
- --------
605
- >>> df = pl.DataFrame(
606
- ... {
607
- ... "a": [1, 2, 2, 3, 4, 5],
608
- ... "b": [0.5, 0.5, 4, 10, 13, 14],
609
- ... "c": [True, True, True, False, False, True],
610
- ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
611
- ... }
612
- ... )
613
- >>> df.group_by("d", maintain_order=True).mean()
614
- shape: (3, 4)
615
- ┌────────┬─────┬──────────┬──────────┐
616
- │ d ┆ a ┆ b ┆ c │
617
- │ --- ┆ --- ┆ --- ┆ --- │
618
- │ str ┆ f64 ┆ f64 ┆ f64 │
619
- ╞════════╪═════╪══════════╪══════════╡
620
- │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
621
- │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
622
- │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
623
- └────────┴─────┴──────────┴──────────┘
624
- """
625
- return self.agg(F.all().mean())
626
-
627
- def median(self) -> DataFrame:
628
- """
629
- Return the median per group.
630
-
631
- Examples
632
- --------
633
- >>> df = pl.DataFrame(
634
- ... {
635
- ... "a": [1, 2, 2, 3, 4, 5],
636
- ... "b": [0.5, 0.5, 4, 10, 13, 14],
637
- ... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
638
- ... }
639
- ... )
640
- >>> df.group_by("d", maintain_order=True).median()
641
- shape: (2, 3)
642
- ┌────────┬─────┬──────┐
643
- │ d ┆ a ┆ b │
644
- │ --- ┆ --- ┆ --- │
645
- │ str ┆ f64 ┆ f64 │
646
- ╞════════╪═════╪══════╡
647
- │ Apple ┆ 2.0 ┆ 4.0 │
648
- │ Banana ┆ 4.0 ┆ 13.0 │
649
- └────────┴─────┴──────┘
650
- """
651
- return self.agg(F.all().median())
652
-
653
- def min(self) -> DataFrame:
654
- """
655
- Reduce the groups to the minimal value.
656
-
657
- Examples
658
- --------
659
- >>> df = pl.DataFrame(
660
- ... {
661
- ... "a": [1, 2, 2, 3, 4, 5],
662
- ... "b": [0.5, 0.5, 4, 10, 13, 14],
663
- ... "c": [True, True, True, False, False, True],
664
- ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
665
- ... }
666
- ... )
667
- >>> df.group_by("d", maintain_order=True).min()
668
- shape: (3, 4)
669
- ┌────────┬─────┬──────┬───────┐
670
- │ d ┆ a ┆ b ┆ c │
671
- │ --- ┆ --- ┆ --- ┆ --- │
672
- │ str ┆ i64 ┆ f64 ┆ bool │
673
- ╞════════╪═════╪══════╪═══════╡
674
- │ Apple ┆ 1 ┆ 0.5 ┆ false │
675
- │ Orange ┆ 2 ┆ 0.5 ┆ true │
676
- │ Banana ┆ 4 ┆ 13.0 ┆ false │
677
- └────────┴─────┴──────┴───────┘
678
- """
679
- return self.agg(F.all().min())
680
-
681
- def n_unique(self) -> DataFrame:
682
- """
683
- Count the unique values per group.
684
-
685
- Examples
686
- --------
687
- >>> df = pl.DataFrame(
688
- ... {
689
- ... "a": [1, 2, 1, 3, 4, 5],
690
- ... "b": [0.5, 0.5, 0.5, 10, 13, 14],
691
- ... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
692
- ... }
693
- ... )
694
- >>> df.group_by("d", maintain_order=True).n_unique()
695
- shape: (2, 3)
696
- ┌────────┬─────┬─────┐
697
- │ d ┆ a ┆ b │
698
- │ --- ┆ --- ┆ --- │
699
- │ str ┆ u32 ┆ u32 │
700
- ╞════════╪═════╪═════╡
701
- │ Apple ┆ 2 ┆ 2 │
702
- │ Banana ┆ 3 ┆ 3 │
703
- └────────┴─────┴─────┘
704
- """
705
- return self.agg(F.all().n_unique())
706
-
707
- def quantile(
708
- self, quantile: float, interpolation: QuantileMethod = "nearest"
709
- ) -> DataFrame:
710
- """
711
- Compute the quantile per group.
712
-
713
- Parameters
714
- ----------
715
- quantile
716
- Quantile between 0.0 and 1.0.
717
- interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'}
718
- Interpolation method.
719
-
720
- Examples
721
- --------
722
- >>> df = pl.DataFrame(
723
- ... {
724
- ... "a": [1, 2, 2, 3, 4, 5],
725
- ... "b": [0.5, 0.5, 4, 10, 13, 14],
726
- ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
727
- ... }
728
- ... )
729
- >>> df.group_by("d", maintain_order=True).quantile(1)
730
- shape: (3, 3)
731
- ┌────────┬─────┬──────┐
732
- │ d ┆ a ┆ b │
733
- │ --- ┆ --- ┆ --- │
734
- │ str ┆ f64 ┆ f64 │
735
- ╞════════╪═════╪══════╡
736
- │ Apple ┆ 3.0 ┆ 10.0 │
737
- │ Orange ┆ 2.0 ┆ 0.5 │
738
- │ Banana ┆ 5.0 ┆ 14.0 │
739
- └────────┴─────┴──────┘
740
- """ # noqa: W505
741
- return self.agg(F.all().quantile(quantile, interpolation=interpolation))
742
-
743
- def sum(self) -> DataFrame:
744
- """
745
- Reduce the groups to the sum.
746
-
747
- Examples
748
- --------
749
- >>> df = pl.DataFrame(
750
- ... {
751
- ... "a": [1, 2, 2, 3, 4, 5],
752
- ... "b": [0.5, 0.5, 4, 10, 13, 14],
753
- ... "c": [True, True, True, False, False, True],
754
- ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
755
- ... }
756
- ... )
757
- >>> df.group_by("d", maintain_order=True).sum()
758
- shape: (3, 4)
759
- ┌────────┬─────┬──────┬─────┐
760
- │ d ┆ a ┆ b ┆ c │
761
- │ --- ┆ --- ┆ --- ┆ --- │
762
- │ str ┆ i64 ┆ f64 ┆ u32 │
763
- ╞════════╪═════╪══════╪═════╡
764
- │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
765
- │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
766
- │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
767
- └────────┴─────┴──────┴─────┘
768
- """
769
- return self.agg(F.all().sum())
770
-
771
-
772
- class RollingGroupBy:
773
- """
774
- A rolling grouper.
775
-
776
- This has an `.agg` method which will allow you to run all polars expressions in a
777
- group by context.
778
- """
779
-
780
- def __init__(
781
- self,
782
- df: DataFrame,
783
- index_column: IntoExpr,
784
- *,
785
- period: str | timedelta,
786
- offset: str | timedelta | None,
787
- closed: ClosedInterval,
788
- group_by: IntoExpr | Iterable[IntoExpr] | None,
789
- ) -> None:
790
- period = parse_as_duration_string(period)
791
- offset = parse_as_duration_string(offset)
792
-
793
- self.df = df
794
- self.time_column = index_column
795
- self.period = period
796
- self.offset = offset
797
- self.closed = closed
798
- self.group_by = group_by
799
-
800
- def __iter__(self) -> Self:
801
- from polars.lazyframe.opt_flags import QueryOptFlags
802
-
803
- temp_col = "__POLARS_GB_GROUP_INDICES"
804
- groups_df = (
805
- self.df.lazy()
806
- .rolling(
807
- index_column=self.time_column,
808
- period=self.period,
809
- offset=self.offset,
810
- closed=self.closed,
811
- group_by=self.group_by,
812
- )
813
- .agg(F.first().agg_groups().alias(temp_col))
814
- .collect(optimizations=QueryOptFlags.none())
815
- )
816
-
817
- self._group_names = groups_df.select(F.all().exclude(temp_col)).iter_rows()
818
- self._group_indices = groups_df.select(temp_col).to_series()
819
- self._current_index = 0
820
-
821
- return self
822
-
823
- def __next__(self) -> tuple[tuple[object, ...], DataFrame]:
824
- if self._current_index >= len(self._group_indices):
825
- raise StopIteration
826
-
827
- group_name = next(self._group_names)
828
- group_data = self.df[self._group_indices[self._current_index], :]
829
- self._current_index += 1
830
-
831
- return group_name, group_data
832
-
833
- def agg(
834
- self,
835
- *aggs: IntoExpr | Iterable[IntoExpr],
836
- **named_aggs: IntoExpr,
837
- ) -> DataFrame:
838
- """
839
- Compute aggregations for each group of a group by operation.
840
-
841
- Parameters
842
- ----------
843
- *aggs
844
- Aggregations to compute for each group of the group by operation,
845
- specified as positional arguments.
846
- Accepts expression input. Strings are parsed as column names.
847
- **named_aggs
848
- Additional aggregations, specified as keyword arguments.
849
- The resulting columns will be renamed to the keyword used.
850
- """
851
- from polars.lazyframe.opt_flags import QueryOptFlags
852
-
853
- return (
854
- self.df.lazy()
855
- .rolling(
856
- index_column=self.time_column,
857
- period=self.period,
858
- offset=self.offset,
859
- closed=self.closed,
860
- group_by=self.group_by,
861
- )
862
- .agg(*aggs, **named_aggs)
863
- .collect(optimizations=QueryOptFlags.none())
864
- )
865
-
866
- def map_groups(
867
- self,
868
- function: Callable[[DataFrame], DataFrame],
869
- schema: SchemaDict | None,
870
- ) -> DataFrame:
871
- """
872
- Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
873
-
874
- Using this is considered an anti-pattern as it will be very slow because:
875
-
876
- - it forces the engine to materialize the whole `DataFrames` for the groups.
877
- - it is not parallelized.
878
- - it blocks optimizations as the passed python function is opaque to the
879
- optimizer.
880
-
881
- The idiomatic way to apply custom functions over multiple columns is using:
882
-
883
- `pl.struct([my_columns]).map_elements(lambda struct_series: ..)`
884
-
885
- Parameters
886
- ----------
887
- function
888
- Function to apply over each group of the `LazyFrame`; it receives
889
- a DataFrame and should return a DataFrame.
890
- schema
891
- Schema of the output function. This has to be known statically. If the
892
- given schema is incorrect, this is a bug in the caller's query and may
893
- lead to errors. If set to None, polars assumes the schema is unchanged.
894
- """
895
- from polars.lazyframe.opt_flags import QueryOptFlags
896
-
897
- return (
898
- self.df.lazy()
899
- .rolling(
900
- index_column=self.time_column,
901
- period=self.period,
902
- offset=self.offset,
903
- closed=self.closed,
904
- group_by=self.group_by,
905
- )
906
- .map_groups(function, schema)
907
- .collect(optimizations=QueryOptFlags.none())
908
- )
909
-
910
-
911
- class DynamicGroupBy:
912
- """
913
- A dynamic grouper.
914
-
915
- This has an `.agg` method which allows you to run all polars expressions in a
916
- group by context.
917
- """
918
-
919
- def __init__(
920
- self,
921
- df: DataFrame,
922
- index_column: IntoExpr,
923
- *,
924
- every: str | timedelta,
925
- period: str | timedelta | None,
926
- offset: str | timedelta | None,
927
- include_boundaries: bool,
928
- closed: ClosedInterval,
929
- label: Label,
930
- group_by: IntoExpr | Iterable[IntoExpr] | None,
931
- start_by: StartBy,
932
- ) -> None:
933
- every = parse_as_duration_string(every)
934
- period = parse_as_duration_string(period)
935
- offset = parse_as_duration_string(offset)
936
-
937
- self.df = df
938
- self.time_column = index_column
939
- self.every = every
940
- self.period = period
941
- self.offset = offset
942
- self.label = label
943
- self.include_boundaries = include_boundaries
944
- self.closed = closed
945
- self.group_by = group_by
946
- self.start_by = start_by
947
-
948
- def __iter__(self) -> Self:
949
- from polars.lazyframe.opt_flags import QueryOptFlags
950
-
951
- temp_col = "__POLARS_GB_GROUP_INDICES"
952
- groups_df = (
953
- self.df.lazy()
954
- .group_by_dynamic(
955
- index_column=self.time_column,
956
- every=self.every,
957
- period=self.period,
958
- offset=self.offset,
959
- label=self.label,
960
- include_boundaries=self.include_boundaries,
961
- closed=self.closed,
962
- group_by=self.group_by,
963
- start_by=self.start_by,
964
- )
965
- .agg(F.first().agg_groups().alias(temp_col))
966
- .collect(optimizations=QueryOptFlags.none())
967
- )
968
-
969
- self._group_names = groups_df.select(F.all().exclude(temp_col)).iter_rows()
970
- self._group_indices = groups_df.select(temp_col).to_series()
971
- self._current_index = 0
972
-
973
- return self
974
-
975
- def __next__(self) -> tuple[tuple[object, ...], DataFrame]:
976
- if self._current_index >= len(self._group_indices):
977
- raise StopIteration
978
-
979
- group_name = next(self._group_names)
980
- group_data = self.df[self._group_indices[self._current_index], :]
981
- self._current_index += 1
982
-
983
- return group_name, group_data
984
-
985
- def agg(
986
- self,
987
- *aggs: IntoExpr | Iterable[IntoExpr],
988
- **named_aggs: IntoExpr,
989
- ) -> DataFrame:
990
- """
991
- Compute aggregations for each group of a group by operation.
992
-
993
- Parameters
994
- ----------
995
- *aggs
996
- Aggregations to compute for each group of the group by operation,
997
- specified as positional arguments.
998
- Accepts expression input. Strings are parsed as column names.
999
- **named_aggs
1000
- Additional aggregations, specified as keyword arguments.
1001
- The resulting columns will be renamed to the keyword used.
1002
- """
1003
- from polars.lazyframe.opt_flags import QueryOptFlags
1004
-
1005
- return (
1006
- self.df.lazy()
1007
- .group_by_dynamic(
1008
- index_column=self.time_column,
1009
- every=self.every,
1010
- period=self.period,
1011
- offset=self.offset,
1012
- label=self.label,
1013
- include_boundaries=self.include_boundaries,
1014
- closed=self.closed,
1015
- group_by=self.group_by,
1016
- start_by=self.start_by,
1017
- )
1018
- .agg(*aggs, **named_aggs)
1019
- .collect(optimizations=QueryOptFlags.none())
1020
- )
1021
-
1022
- def map_groups(
1023
- self,
1024
- function: Callable[[DataFrame], DataFrame],
1025
- schema: SchemaDict | None,
1026
- ) -> DataFrame:
1027
- """
1028
- Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
1029
-
1030
- Using this is considered an anti-pattern as it will be very slow because:
1031
-
1032
- - it forces the engine to materialize the whole `DataFrames` for the groups.
1033
- - it is not parallelized.
1034
- - it blocks optimizations as the passed python function is opaque to the
1035
- optimizer.
1036
-
1037
- The idiomatic way to apply custom functions over multiple columns is using:
1038
-
1039
- `pl.struct([my_columns]).map_elements(lambda struct_series: ..)`
1040
-
1041
- Parameters
1042
- ----------
1043
- function
1044
- Function to apply over each group of the `LazyFrame`; it receives
1045
- a DataFrame and should return a DataFrame.
1046
- schema
1047
- Schema of the output function. This has to be known statically. If the
1048
- given schema is incorrect, this is a bug in the caller's query and may
1049
- lead to errors. If set to None, polars assumes the schema is unchanged.
1050
- """
1051
- from polars.lazyframe.opt_flags import QueryOptFlags
1052
-
1053
- return (
1054
- self.df.lazy()
1055
- .group_by_dynamic(
1056
- index_column=self.time_column,
1057
- every=self.every,
1058
- period=self.period,
1059
- offset=self.offset,
1060
- include_boundaries=self.include_boundaries,
1061
- closed=self.closed,
1062
- group_by=self.group_by,
1063
- start_by=self.start_by,
1064
- )
1065
- .map_groups(function, schema)
1066
- .collect(optimizations=QueryOptFlags.none())
1067
- )