polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,1067 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Callable
4
+
5
+ from polars import functions as F
6
+ from polars._utils.convert import parse_as_duration_string
7
+ from polars._utils.deprecation import deprecated
8
+
9
+ if TYPE_CHECKING:
10
+ import sys
11
+ from collections.abc import Iterable
12
+ from datetime import timedelta
13
+
14
+ from polars import DataFrame
15
+ from polars._typing import (
16
+ ClosedInterval,
17
+ IntoExpr,
18
+ Label,
19
+ QuantileMethod,
20
+ SchemaDict,
21
+ StartBy,
22
+ )
23
+
24
+ if sys.version_info >= (3, 11):
25
+ from typing import Self
26
+ else:
27
+ from typing_extensions import Self
28
+
29
+ if sys.version_info >= (3, 13):
30
+ from warnings import deprecated
31
+ else:
32
+ from typing_extensions import deprecated # noqa: TC004
33
+
34
+
35
+ class GroupBy:
36
+ """Starts a new GroupBy operation."""
37
+
38
+ def __init__(
39
+ self,
40
+ df: DataFrame,
41
+ *by: IntoExpr | Iterable[IntoExpr],
42
+ maintain_order: bool,
43
+ **named_by: IntoExpr,
44
+ ) -> None:
45
+ """
46
+ Utility class for performing a group by operation over the given DataFrame.
47
+
48
+ Generated by calling `df.group_by(...)`.
49
+
50
+ Parameters
51
+ ----------
52
+ df
53
+ DataFrame to perform the group by operation over.
54
+ *by
55
+ Column or columns to group by. Accepts expression input. Strings are parsed
56
+ as column names.
57
+ maintain_order
58
+ Ensure that the order of the groups is consistent with the input data.
59
+ This is slower than a default group by.
60
+ **named_by
61
+ Additional column(s) to group by, specified as keyword arguments.
62
+ The columns will be named as the keyword used.
63
+ """
64
+ self.df = df
65
+ self.by = by
66
+ self.named_by = named_by
67
+ self.maintain_order = maintain_order
68
+
69
+ def __iter__(self) -> Self:
70
+ """
71
+ Allows iteration over the groups of the group by operation.
72
+
73
+ Each group is represented by a tuple of `(name, data)`. The group names are
74
+ tuples of the distinct group values that identify each group.
75
+
76
+ Examples
77
+ --------
78
+ >>> df = pl.DataFrame({"foo": ["a", "a", "b"], "bar": [1, 2, 3]})
79
+ >>> for name, data in df.group_by("foo"): # doctest: +SKIP
80
+ ... print(name)
81
+ ... print(data)
82
+ (a,)
83
+ shape: (2, 2)
84
+ ┌─────┬─────┐
85
+ │ foo ┆ bar │
86
+ │ --- ┆ --- │
87
+ │ str ┆ i64 │
88
+ ╞═════╪═════╡
89
+ │ a ┆ 1 │
90
+ │ a ┆ 2 │
91
+ └─────┴─────┘
92
+ (b,)
93
+ shape: (1, 2)
94
+ ┌─────┬─────┐
95
+ │ foo ┆ bar │
96
+ │ --- ┆ --- │
97
+ │ str ┆ i64 │
98
+ ╞═════╪═════╡
99
+ │ b ┆ 3 │
100
+ └─────┴─────┘
101
+ """
102
+ # Every group gather can trigger a rechunk, so do early.
103
+ from polars.lazyframe.opt_flags import QueryOptFlags
104
+
105
+ self.df = self.df.rechunk()
106
+ temp_col = "__POLARS_GB_GROUP_INDICES"
107
+ groups_df = (
108
+ self.df.lazy()
109
+ .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
110
+ .agg(F.first().agg_groups().alias(temp_col))
111
+ .collect(optimizations=QueryOptFlags.none())
112
+ )
113
+
114
+ self._group_names = groups_df.select(F.all().exclude(temp_col)).iter_rows()
115
+ self._group_indices = groups_df.select(temp_col).to_series()
116
+ self._current_index = 0
117
+
118
+ return self
119
+
120
+ def __next__(self) -> tuple[tuple[Any, ...], DataFrame]:
121
+ if self._current_index >= len(self._group_indices):
122
+ raise StopIteration
123
+
124
+ group_name = next(self._group_names)
125
+ group_data = self.df[self._group_indices[self._current_index], :]
126
+ self._current_index += 1
127
+
128
+ return group_name, group_data
129
+
130
+ def agg(
131
+ self,
132
+ *aggs: IntoExpr | Iterable[IntoExpr],
133
+ **named_aggs: IntoExpr,
134
+ ) -> DataFrame:
135
+ """
136
+ Compute aggregations for each group of a group by operation.
137
+
138
+ Parameters
139
+ ----------
140
+ *aggs
141
+ Aggregations to compute for each group of the group by operation,
142
+ specified as positional arguments.
143
+ Accepts expression input. Strings are parsed as column names.
144
+ **named_aggs
145
+ Additional aggregations, specified as keyword arguments.
146
+ The resulting columns will be renamed to the keyword used.
147
+
148
+ Examples
149
+ --------
150
+ Compute the aggregation of the columns for each group.
151
+
152
+ >>> df = pl.DataFrame(
153
+ ... {
154
+ ... "a": ["a", "b", "a", "b", "c"],
155
+ ... "b": [1, 2, 1, 3, 3],
156
+ ... "c": [5, 4, 3, 2, 1],
157
+ ... }
158
+ ... )
159
+ >>> df.group_by("a").agg(pl.col("b"), pl.col("c")) # doctest: +IGNORE_RESULT
160
+ shape: (3, 3)
161
+ ┌─────┬───────────┬───────────┐
162
+ │ a ┆ b ┆ c │
163
+ │ --- ┆ --- ┆ --- │
164
+ │ str ┆ list[i64] ┆ list[i64] │
165
+ ╞═════╪═══════════╪═══════════╡
166
+ │ a ┆ [1, 1] ┆ [5, 3] │
167
+ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
168
+ │ b ┆ [2, 3] ┆ [4, 2] │
169
+ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
170
+ │ c ┆ [3] ┆ [1] │
171
+ └─────┴───────────┴───────────┘
172
+
173
+ Compute the sum of a column for each group.
174
+
175
+ >>> df.group_by("a").agg(pl.col("b").sum()) # doctest: +IGNORE_RESULT
176
+ shape: (3, 2)
177
+ ┌─────┬─────┐
178
+ │ a ┆ b │
179
+ │ --- ┆ --- │
180
+ │ str ┆ i64 │
181
+ ╞═════╪═════╡
182
+ │ a ┆ 2 │
183
+ │ b ┆ 5 │
184
+ │ c ┆ 3 │
185
+ └─────┴─────┘
186
+
187
+ Compute multiple aggregates at once by passing a list of expressions.
188
+
189
+ >>> df.group_by("a").agg([pl.sum("b"), pl.mean("c")]) # doctest: +IGNORE_RESULT
190
+ shape: (3, 3)
191
+ ┌─────┬─────┬─────┐
192
+ │ a ┆ b ┆ c │
193
+ │ --- ┆ --- ┆ --- │
194
+ │ str ┆ i64 ┆ f64 │
195
+ ╞═════╪═════╪═════╡
196
+ │ c ┆ 3 ┆ 1.0 │
197
+ │ a ┆ 2 ┆ 4.0 │
198
+ │ b ┆ 5 ┆ 3.0 │
199
+ └─────┴─────┴─────┘
200
+
201
+ Or use positional arguments to compute multiple aggregations in the same way.
202
+
203
+ >>> df.group_by("a").agg(
204
+ ... pl.sum("b").name.suffix("_sum"),
205
+ ... (pl.col("c") ** 2).mean().name.suffix("_mean_squared"),
206
+ ... ) # doctest: +IGNORE_RESULT
207
+ shape: (3, 3)
208
+ ┌─────┬───────┬────────────────┐
209
+ │ a ┆ b_sum ┆ c_mean_squared │
210
+ │ --- ┆ --- ┆ --- │
211
+ │ str ┆ i64 ┆ f64 │
212
+ ╞═════╪═══════╪════════════════╡
213
+ │ a ┆ 2 ┆ 17.0 │
214
+ │ c ┆ 3 ┆ 1.0 │
215
+ │ b ┆ 5 ┆ 10.0 │
216
+ └─────┴───────┴────────────────┘
217
+
218
+ Use keyword arguments to easily name your expression inputs.
219
+
220
+ >>> df.group_by("a").agg(
221
+ ... b_sum=pl.sum("b"),
222
+ ... c_mean_squared=(pl.col("c") ** 2).mean(),
223
+ ... ) # doctest: +IGNORE_RESULT
224
+ shape: (3, 3)
225
+ ┌─────┬───────┬────────────────┐
226
+ │ a ┆ b_sum ┆ c_mean_squared │
227
+ │ --- ┆ --- ┆ --- │
228
+ │ str ┆ i64 ┆ f64 │
229
+ ╞═════╪═══════╪════════════════╡
230
+ │ a ┆ 2 ┆ 17.0 │
231
+ │ c ┆ 3 ┆ 1.0 │
232
+ │ b ┆ 5 ┆ 10.0 │
233
+ └─────┴───────┴────────────────┘
234
+ """
235
+ from polars.lazyframe.opt_flags import QueryOptFlags
236
+
237
+ return (
238
+ self.df.lazy()
239
+ .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
240
+ .agg(*aggs, **named_aggs)
241
+ .collect(optimizations=QueryOptFlags.none())
242
+ )
243
+
244
+ def map_groups(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
245
+ """
246
+ Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
247
+
248
+ .. warning::
249
+ This method is much slower than the native expressions API.
250
+ Only use it if you cannot implement your logic otherwise.
251
+
252
+ Implementing logic using a Python function is almost always *significantly*
253
+ slower and more memory intensive than implementing the same logic using
254
+ the native expression API because:
255
+
256
+ - The native expression engine runs in Rust; UDFs run in Python.
257
+ - Use of Python UDFs forces the DataFrame to be materialized in memory.
258
+ - Polars-native expressions can be parallelised (UDFs cannot).
259
+ - Polars-native expressions can be logically optimised (UDFs cannot).
260
+
261
+ Wherever possible you should strongly prefer the native expression API
262
+ to achieve the best performance.
263
+
264
+ Parameters
265
+ ----------
266
+ function
267
+ Custom function that receives a DataFrame and returns a DataFrame.
268
+
269
+ Returns
270
+ -------
271
+ DataFrame
272
+
273
+ Examples
274
+ --------
275
+ For each color group sample two rows:
276
+
277
+ >>> df = pl.DataFrame(
278
+ ... {
279
+ ... "id": [0, 1, 2, 3, 4],
280
+ ... "color": ["red", "green", "green", "red", "red"],
281
+ ... "shape": ["square", "triangle", "square", "triangle", "square"],
282
+ ... }
283
+ ... )
284
+ >>> df.group_by("color").map_groups(
285
+ ... lambda group_df: group_df.sample(2)
286
+ ... ) # doctest: +IGNORE_RESULT
287
+ shape: (4, 3)
288
+ ┌─────┬───────┬──────────┐
289
+ │ id ┆ color ┆ shape │
290
+ │ --- ┆ --- ┆ --- │
291
+ │ i64 ┆ str ┆ str │
292
+ ╞═════╪═══════╪══════════╡
293
+ │ 1 ┆ green ┆ triangle │
294
+ │ 2 ┆ green ┆ square │
295
+ │ 4 ┆ red ┆ square │
296
+ │ 3 ┆ red ┆ triangle │
297
+ └─────┴───────┴──────────┘
298
+
299
+ It is better to implement this with an expression:
300
+
301
+ >>> df.filter(
302
+ ... pl.int_range(pl.len()).shuffle().over("color") < 2
303
+ ... ) # doctest: +IGNORE_RESULT
304
+ """
305
+ if self.named_by:
306
+ msg = "cannot call `map_groups` when grouping by named expressions"
307
+ raise TypeError(msg)
308
+ if not all(isinstance(c, str) for c in self.by):
309
+ msg = "cannot call `map_groups` when grouping by an expression"
310
+ raise TypeError(msg)
311
+
312
+ by_strs: list[str] = self.by # type: ignore[assignment]
313
+
314
+ return self.df.__class__._from_pydf(
315
+ self.df._df.group_by_map_groups(by_strs, function, self.maintain_order)
316
+ )
317
+
318
+ def head(self, n: int = 5) -> DataFrame:
319
+ """
320
+ Get the first `n` rows of each group.
321
+
322
+ Parameters
323
+ ----------
324
+ n
325
+ Number of rows to return.
326
+
327
+ Examples
328
+ --------
329
+ >>> df = pl.DataFrame(
330
+ ... {
331
+ ... "letters": ["c", "c", "a", "c", "a", "b"],
332
+ ... "nrs": [1, 2, 3, 4, 5, 6],
333
+ ... }
334
+ ... )
335
+ >>> df
336
+ shape: (6, 2)
337
+ ┌─────────┬─────┐
338
+ │ letters ┆ nrs │
339
+ │ --- ┆ --- │
340
+ │ str ┆ i64 │
341
+ ╞═════════╪═════╡
342
+ │ c ┆ 1 │
343
+ │ c ┆ 2 │
344
+ │ a ┆ 3 │
345
+ │ c ┆ 4 │
346
+ │ a ┆ 5 │
347
+ │ b ┆ 6 │
348
+ └─────────┴─────┘
349
+ >>> df.group_by("letters").head(2).sort("letters")
350
+ shape: (5, 2)
351
+ ┌─────────┬─────┐
352
+ │ letters ┆ nrs │
353
+ │ --- ┆ --- │
354
+ │ str ┆ i64 │
355
+ ╞═════════╪═════╡
356
+ │ a ┆ 3 │
357
+ │ a ┆ 5 │
358
+ │ b ┆ 6 │
359
+ │ c ┆ 1 │
360
+ │ c ┆ 2 │
361
+ └─────────┴─────┘
362
+ """
363
+ from polars.lazyframe.opt_flags import QueryOptFlags
364
+
365
+ return (
366
+ self.df.lazy()
367
+ .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
368
+ .head(n)
369
+ .collect(optimizations=QueryOptFlags._eager())
370
+ )
371
+
372
+ def tail(self, n: int = 5) -> DataFrame:
373
+ """
374
+ Get the last `n` rows of each group.
375
+
376
+ Parameters
377
+ ----------
378
+ n
379
+ Number of rows to return.
380
+
381
+ Examples
382
+ --------
383
+ >>> df = pl.DataFrame(
384
+ ... {
385
+ ... "letters": ["c", "c", "a", "c", "a", "b"],
386
+ ... "nrs": [1, 2, 3, 4, 5, 6],
387
+ ... }
388
+ ... )
389
+ >>> df
390
+ shape: (6, 2)
391
+ ┌─────────┬─────┐
392
+ │ letters ┆ nrs │
393
+ │ --- ┆ --- │
394
+ │ str ┆ i64 │
395
+ ╞═════════╪═════╡
396
+ │ c ┆ 1 │
397
+ │ c ┆ 2 │
398
+ │ a ┆ 3 │
399
+ │ c ┆ 4 │
400
+ │ a ┆ 5 │
401
+ │ b ┆ 6 │
402
+ └─────────┴─────┘
403
+ >>> df.group_by("letters").tail(2).sort("letters")
404
+ shape: (5, 2)
405
+ ┌─────────┬─────┐
406
+ │ letters ┆ nrs │
407
+ │ --- ┆ --- │
408
+ │ str ┆ i64 │
409
+ ╞═════════╪═════╡
410
+ │ a ┆ 3 │
411
+ │ a ┆ 5 │
412
+ │ b ┆ 6 │
413
+ │ c ┆ 2 │
414
+ │ c ┆ 4 │
415
+ └─────────┴─────┘
416
+ """
417
+ from polars.lazyframe.opt_flags import QueryOptFlags
418
+
419
+ return (
420
+ self.df.lazy()
421
+ .group_by(*self.by, **self.named_by, maintain_order=self.maintain_order)
422
+ .tail(n)
423
+ .collect(optimizations=QueryOptFlags.none())
424
+ )
425
+
426
+ def all(self) -> DataFrame:
427
+ """
428
+ Aggregate the groups into Series.
429
+
430
+ Examples
431
+ --------
432
+ >>> df = pl.DataFrame({"a": ["one", "two", "one", "two"], "b": [1, 2, 3, 4]})
433
+ >>> df.group_by("a", maintain_order=True).all()
434
+ shape: (2, 2)
435
+ ┌─────┬───────────┐
436
+ │ a ┆ b │
437
+ │ --- ┆ --- │
438
+ │ str ┆ list[i64] │
439
+ ╞═════╪═══════════╡
440
+ │ one ┆ [1, 3] │
441
+ │ two ┆ [2, 4] │
442
+ └─────┴───────────┘
443
+ """
444
+ return self.agg(F.all())
445
+
446
+ def len(self, name: str | None = None) -> DataFrame:
447
+ """
448
+ Return the number of rows in each group.
449
+
450
+ Parameters
451
+ ----------
452
+ name
453
+ Assign a name to the resulting column; if unset, defaults to "len".
454
+
455
+ Examples
456
+ --------
457
+ >>> df = pl.DataFrame({"a": ["Apple", "Apple", "Orange"], "b": [1, None, 2]})
458
+ >>> df.group_by("a").len() # doctest: +IGNORE_RESULT
459
+ shape: (2, 2)
460
+ ┌────────┬─────┐
461
+ │ a ┆ len │
462
+ │ --- ┆ --- │
463
+ │ str ┆ u32 │
464
+ ╞════════╪═════╡
465
+ │ Apple ┆ 2 │
466
+ │ Orange ┆ 1 │
467
+ └────────┴─────┘
468
+ >>> df.group_by("a").len(name="n") # doctest: +IGNORE_RESULT
469
+ shape: (2, 2)
470
+ ┌────────┬─────┐
471
+ │ a ┆ n │
472
+ │ --- ┆ --- │
473
+ │ str ┆ u32 │
474
+ ╞════════╪═════╡
475
+ │ Apple ┆ 2 │
476
+ │ Orange ┆ 1 │
477
+ └────────┴─────┘
478
+ """
479
+ len_expr = F.len()
480
+ if name is not None:
481
+ len_expr = len_expr.alias(name)
482
+ return self.agg(len_expr)
483
+
484
+ @deprecated("`GroupBy.count` was renamed; use `GroupBy.len` instead")
485
+ def count(self) -> DataFrame:
486
+ """
487
+ Return the number of rows in each group.
488
+
489
+ .. deprecated:: 0.20.5
490
+ This method has been renamed to :func:`GroupBy.len`.
491
+
492
+ Rows containing null values count towards the total.
493
+
494
+ Examples
495
+ --------
496
+ >>> df = pl.DataFrame(
497
+ ... {
498
+ ... "a": ["Apple", "Apple", "Orange"],
499
+ ... "b": [1, None, 2],
500
+ ... }
501
+ ... )
502
+ >>> df.group_by("a").count() # doctest: +SKIP
503
+ shape: (2, 2)
504
+ ┌────────┬───────┐
505
+ │ a ┆ count │
506
+ │ --- ┆ --- │
507
+ │ str ┆ u32 │
508
+ ╞════════╪═══════╡
509
+ │ Apple ┆ 2 │
510
+ │ Orange ┆ 1 │
511
+ └────────┴───────┘
512
+ """
513
+ return self.agg(F.len().alias("count"))
514
+
515
+ def first(self) -> DataFrame:
516
+ """
517
+ Aggregate the first values in the group.
518
+
519
+ Examples
520
+ --------
521
+ >>> df = pl.DataFrame(
522
+ ... {
523
+ ... "a": [1, 2, 2, 3, 4, 5],
524
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
525
+ ... "c": [True, True, True, False, False, True],
526
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
527
+ ... }
528
+ ... )
529
+ >>> df.group_by("d", maintain_order=True).first()
530
+ shape: (3, 4)
531
+ ┌────────┬─────┬──────┬───────┐
532
+ │ d ┆ a ┆ b ┆ c │
533
+ │ --- ┆ --- ┆ --- ┆ --- │
534
+ │ str ┆ i64 ┆ f64 ┆ bool │
535
+ ╞════════╪═════╪══════╪═══════╡
536
+ │ Apple ┆ 1 ┆ 0.5 ┆ true │
537
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
538
+ │ Banana ┆ 4 ┆ 13.0 ┆ false │
539
+ └────────┴─────┴──────┴───────┘
540
+ """
541
+ return self.agg(F.all().first())
542
+
543
+ def last(self) -> DataFrame:
544
+ """
545
+ Aggregate the last values in the group.
546
+
547
+ Examples
548
+ --------
549
+ >>> df = pl.DataFrame(
550
+ ... {
551
+ ... "a": [1, 2, 2, 3, 4, 5],
552
+ ... "b": [0.5, 0.5, 4, 10, 14, 13],
553
+ ... "c": [True, True, True, False, False, True],
554
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
555
+ ... }
556
+ ... )
557
+ >>> df.group_by("d", maintain_order=True).last()
558
+ shape: (3, 4)
559
+ ┌────────┬─────┬──────┬───────┐
560
+ │ d ┆ a ┆ b ┆ c │
561
+ │ --- ┆ --- ┆ --- ┆ --- │
562
+ │ str ┆ i64 ┆ f64 ┆ bool │
563
+ ╞════════╪═════╪══════╪═══════╡
564
+ │ Apple ┆ 3 ┆ 10.0 ┆ false │
565
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
566
+ │ Banana ┆ 5 ┆ 13.0 ┆ true │
567
+ └────────┴─────┴──────┴───────┘
568
+ """
569
+ return self.agg(F.all().last())
570
+
571
+ def max(self) -> DataFrame:
572
+ """
573
+ Reduce the groups to the maximal value.
574
+
575
+ Examples
576
+ --------
577
+ >>> df = pl.DataFrame(
578
+ ... {
579
+ ... "a": [1, 2, 2, 3, 4, 5],
580
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
581
+ ... "c": [True, True, True, False, False, True],
582
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
583
+ ... }
584
+ ... )
585
+ >>> df.group_by("d", maintain_order=True).max()
586
+ shape: (3, 4)
587
+ ┌────────┬─────┬──────┬──────┐
588
+ │ d ┆ a ┆ b ┆ c │
589
+ │ --- ┆ --- ┆ --- ┆ --- │
590
+ │ str ┆ i64 ┆ f64 ┆ bool │
591
+ ╞════════╪═════╪══════╪══════╡
592
+ │ Apple ┆ 3 ┆ 10.0 ┆ true │
593
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
594
+ │ Banana ┆ 5 ┆ 14.0 ┆ true │
595
+ └────────┴─────┴──────┴──────┘
596
+ """
597
+ return self.agg(F.all().max())
598
+
599
+ def mean(self) -> DataFrame:
600
+ """
601
+ Reduce the groups to the mean values.
602
+
603
+ Examples
604
+ --------
605
+ >>> df = pl.DataFrame(
606
+ ... {
607
+ ... "a": [1, 2, 2, 3, 4, 5],
608
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
609
+ ... "c": [True, True, True, False, False, True],
610
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
611
+ ... }
612
+ ... )
613
+ >>> df.group_by("d", maintain_order=True).mean()
614
+ shape: (3, 4)
615
+ ┌────────┬─────┬──────────┬──────────┐
616
+ │ d ┆ a ┆ b ┆ c │
617
+ │ --- ┆ --- ┆ --- ┆ --- │
618
+ │ str ┆ f64 ┆ f64 ┆ f64 │
619
+ ╞════════╪═════╪══════════╪══════════╡
620
+ │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
621
+ │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
622
+ │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
623
+ └────────┴─────┴──────────┴──────────┘
624
+ """
625
+ return self.agg(F.all().mean())
626
+
627
+ def median(self) -> DataFrame:
628
+ """
629
+ Return the median per group.
630
+
631
+ Examples
632
+ --------
633
+ >>> df = pl.DataFrame(
634
+ ... {
635
+ ... "a": [1, 2, 2, 3, 4, 5],
636
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
637
+ ... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
638
+ ... }
639
+ ... )
640
+ >>> df.group_by("d", maintain_order=True).median()
641
+ shape: (2, 3)
642
+ ┌────────┬─────┬──────┐
643
+ │ d ┆ a ┆ b │
644
+ │ --- ┆ --- ┆ --- │
645
+ │ str ┆ f64 ┆ f64 │
646
+ ╞════════╪═════╪══════╡
647
+ │ Apple ┆ 2.0 ┆ 4.0 │
648
+ │ Banana ┆ 4.0 ┆ 13.0 │
649
+ └────────┴─────┴──────┘
650
+ """
651
+ return self.agg(F.all().median())
652
+
653
+ def min(self) -> DataFrame:
654
+ """
655
+ Reduce the groups to the minimal value.
656
+
657
+ Examples
658
+ --------
659
+ >>> df = pl.DataFrame(
660
+ ... {
661
+ ... "a": [1, 2, 2, 3, 4, 5],
662
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
663
+ ... "c": [True, True, True, False, False, True],
664
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
665
+ ... }
666
+ ... )
667
+ >>> df.group_by("d", maintain_order=True).min()
668
+ shape: (3, 4)
669
+ ┌────────┬─────┬──────┬───────┐
670
+ │ d ┆ a ┆ b ┆ c │
671
+ │ --- ┆ --- ┆ --- ┆ --- │
672
+ │ str ┆ i64 ┆ f64 ┆ bool │
673
+ ╞════════╪═════╪══════╪═══════╡
674
+ │ Apple ┆ 1 ┆ 0.5 ┆ false │
675
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
676
+ │ Banana ┆ 4 ┆ 13.0 ┆ false │
677
+ └────────┴─────┴──────┴───────┘
678
+ """
679
+ return self.agg(F.all().min())
680
+
681
+ def n_unique(self) -> DataFrame:
682
+ """
683
+ Count the unique values per group.
684
+
685
+ Examples
686
+ --------
687
+ >>> df = pl.DataFrame(
688
+ ... {
689
+ ... "a": [1, 2, 1, 3, 4, 5],
690
+ ... "b": [0.5, 0.5, 0.5, 10, 13, 14],
691
+ ... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
692
+ ... }
693
+ ... )
694
+ >>> df.group_by("d", maintain_order=True).n_unique()
695
+ shape: (2, 3)
696
+ ┌────────┬─────┬─────┐
697
+ │ d ┆ a ┆ b │
698
+ │ --- ┆ --- ┆ --- │
699
+ │ str ┆ u32 ┆ u32 │
700
+ ╞════════╪═════╪═════╡
701
+ │ Apple ┆ 2 ┆ 2 │
702
+ │ Banana ┆ 3 ┆ 3 │
703
+ └────────┴─────┴─────┘
704
+ """
705
+ return self.agg(F.all().n_unique())
706
+
707
+ def quantile(
708
+ self, quantile: float, interpolation: QuantileMethod = "nearest"
709
+ ) -> DataFrame:
710
+ """
711
+ Compute the quantile per group.
712
+
713
+ Parameters
714
+ ----------
715
+ quantile
716
+ Quantile between 0.0 and 1.0.
717
+ interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'}
718
+ Interpolation method.
719
+
720
+ Examples
721
+ --------
722
+ >>> df = pl.DataFrame(
723
+ ... {
724
+ ... "a": [1, 2, 2, 3, 4, 5],
725
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
726
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
727
+ ... }
728
+ ... )
729
+ >>> df.group_by("d", maintain_order=True).quantile(1)
730
+ shape: (3, 3)
731
+ ┌────────┬─────┬──────┐
732
+ │ d ┆ a ┆ b │
733
+ │ --- ┆ --- ┆ --- │
734
+ │ str ┆ f64 ┆ f64 │
735
+ ╞════════╪═════╪══════╡
736
+ │ Apple ┆ 3.0 ┆ 10.0 │
737
+ │ Orange ┆ 2.0 ┆ 0.5 │
738
+ │ Banana ┆ 5.0 ┆ 14.0 │
739
+ └────────┴─────┴──────┘
740
+ """ # noqa: W505
741
+ return self.agg(F.all().quantile(quantile, interpolation=interpolation))
742
+
743
+ def sum(self) -> DataFrame:
744
+ """
745
+ Reduce the groups to the sum.
746
+
747
+ Examples
748
+ --------
749
+ >>> df = pl.DataFrame(
750
+ ... {
751
+ ... "a": [1, 2, 2, 3, 4, 5],
752
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
753
+ ... "c": [True, True, True, False, False, True],
754
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
755
+ ... }
756
+ ... )
757
+ >>> df.group_by("d", maintain_order=True).sum()
758
+ shape: (3, 4)
759
+ ┌────────┬─────┬──────┬─────┐
760
+ │ d ┆ a ┆ b ┆ c │
761
+ │ --- ┆ --- ┆ --- ┆ --- │
762
+ │ str ┆ i64 ┆ f64 ┆ u32 │
763
+ ╞════════╪═════╪══════╪═════╡
764
+ │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
765
+ │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
766
+ │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
767
+ └────────┴─────┴──────┴─────┘
768
+ """
769
+ return self.agg(F.all().sum())
770
+
771
+
772
+ class RollingGroupBy:
773
+ """
774
+ A rolling grouper.
775
+
776
+ This has an `.agg` method which will allow you to run all polars expressions in a
777
+ group by context.
778
+ """
779
+
780
+ def __init__(
781
+ self,
782
+ df: DataFrame,
783
+ index_column: IntoExpr,
784
+ *,
785
+ period: str | timedelta,
786
+ offset: str | timedelta | None,
787
+ closed: ClosedInterval,
788
+ group_by: IntoExpr | Iterable[IntoExpr] | None,
789
+ ) -> None:
790
+ period = parse_as_duration_string(period)
791
+ offset = parse_as_duration_string(offset)
792
+
793
+ self.df = df
794
+ self.time_column = index_column
795
+ self.period = period
796
+ self.offset = offset
797
+ self.closed = closed
798
+ self.group_by = group_by
799
+
800
+ def __iter__(self) -> Self:
801
+ from polars.lazyframe.opt_flags import QueryOptFlags
802
+
803
+ temp_col = "__POLARS_GB_GROUP_INDICES"
804
+ groups_df = (
805
+ self.df.lazy()
806
+ .rolling(
807
+ index_column=self.time_column,
808
+ period=self.period,
809
+ offset=self.offset,
810
+ closed=self.closed,
811
+ group_by=self.group_by,
812
+ )
813
+ .agg(F.first().agg_groups().alias(temp_col))
814
+ .collect(optimizations=QueryOptFlags.none())
815
+ )
816
+
817
+ self._group_names = groups_df.select(F.all().exclude(temp_col)).iter_rows()
818
+ self._group_indices = groups_df.select(temp_col).to_series()
819
+ self._current_index = 0
820
+
821
+ return self
822
+
823
+ def __next__(self) -> tuple[tuple[object, ...], DataFrame]:
824
+ if self._current_index >= len(self._group_indices):
825
+ raise StopIteration
826
+
827
+ group_name = next(self._group_names)
828
+ group_data = self.df[self._group_indices[self._current_index], :]
829
+ self._current_index += 1
830
+
831
+ return group_name, group_data
832
+
833
+ def agg(
834
+ self,
835
+ *aggs: IntoExpr | Iterable[IntoExpr],
836
+ **named_aggs: IntoExpr,
837
+ ) -> DataFrame:
838
+ """
839
+ Compute aggregations for each group of a group by operation.
840
+
841
+ Parameters
842
+ ----------
843
+ *aggs
844
+ Aggregations to compute for each group of the group by operation,
845
+ specified as positional arguments.
846
+ Accepts expression input. Strings are parsed as column names.
847
+ **named_aggs
848
+ Additional aggregations, specified as keyword arguments.
849
+ The resulting columns will be renamed to the keyword used.
850
+ """
851
+ from polars.lazyframe.opt_flags import QueryOptFlags
852
+
853
+ return (
854
+ self.df.lazy()
855
+ .rolling(
856
+ index_column=self.time_column,
857
+ period=self.period,
858
+ offset=self.offset,
859
+ closed=self.closed,
860
+ group_by=self.group_by,
861
+ )
862
+ .agg(*aggs, **named_aggs)
863
+ .collect(optimizations=QueryOptFlags.none())
864
+ )
865
+
866
+ def map_groups(
867
+ self,
868
+ function: Callable[[DataFrame], DataFrame],
869
+ schema: SchemaDict | None,
870
+ ) -> DataFrame:
871
+ """
872
+ Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
873
+
874
+ Using this is considered an anti-pattern as it will be very slow because:
875
+
876
+ - it forces the engine to materialize the whole `DataFrames` for the groups.
877
+ - it is not parallelized.
878
+ - it blocks optimizations as the passed python function is opaque to the
879
+ optimizer.
880
+
881
+ The idiomatic way to apply custom functions over multiple columns is using:
882
+
883
+ `pl.struct([my_columns]).map_elements(lambda struct_series: ..)`
884
+
885
+ Parameters
886
+ ----------
887
+ function
888
+ Function to apply over each group of the `LazyFrame`; it receives
889
+ a DataFrame and should return a DataFrame.
890
+ schema
891
+ Schema of the output function. This has to be known statically. If the
892
+ given schema is incorrect, this is a bug in the caller's query and may
893
+ lead to errors. If set to None, polars assumes the schema is unchanged.
894
+ """
895
+ from polars.lazyframe.opt_flags import QueryOptFlags
896
+
897
+ return (
898
+ self.df.lazy()
899
+ .rolling(
900
+ index_column=self.time_column,
901
+ period=self.period,
902
+ offset=self.offset,
903
+ closed=self.closed,
904
+ group_by=self.group_by,
905
+ )
906
+ .map_groups(function, schema)
907
+ .collect(optimizations=QueryOptFlags.none())
908
+ )
909
+
910
+
911
+ class DynamicGroupBy:
912
+ """
913
+ A dynamic grouper.
914
+
915
+ This has an `.agg` method which allows you to run all polars expressions in a
916
+ group by context.
917
+ """
918
+
919
+ def __init__(
920
+ self,
921
+ df: DataFrame,
922
+ index_column: IntoExpr,
923
+ *,
924
+ every: str | timedelta,
925
+ period: str | timedelta | None,
926
+ offset: str | timedelta | None,
927
+ include_boundaries: bool,
928
+ closed: ClosedInterval,
929
+ label: Label,
930
+ group_by: IntoExpr | Iterable[IntoExpr] | None,
931
+ start_by: StartBy,
932
+ ) -> None:
933
+ every = parse_as_duration_string(every)
934
+ period = parse_as_duration_string(period)
935
+ offset = parse_as_duration_string(offset)
936
+
937
+ self.df = df
938
+ self.time_column = index_column
939
+ self.every = every
940
+ self.period = period
941
+ self.offset = offset
942
+ self.label = label
943
+ self.include_boundaries = include_boundaries
944
+ self.closed = closed
945
+ self.group_by = group_by
946
+ self.start_by = start_by
947
+
948
+ def __iter__(self) -> Self:
949
+ from polars.lazyframe.opt_flags import QueryOptFlags
950
+
951
+ temp_col = "__POLARS_GB_GROUP_INDICES"
952
+ groups_df = (
953
+ self.df.lazy()
954
+ .group_by_dynamic(
955
+ index_column=self.time_column,
956
+ every=self.every,
957
+ period=self.period,
958
+ offset=self.offset,
959
+ label=self.label,
960
+ include_boundaries=self.include_boundaries,
961
+ closed=self.closed,
962
+ group_by=self.group_by,
963
+ start_by=self.start_by,
964
+ )
965
+ .agg(F.first().agg_groups().alias(temp_col))
966
+ .collect(optimizations=QueryOptFlags.none())
967
+ )
968
+
969
+ self._group_names = groups_df.select(F.all().exclude(temp_col)).iter_rows()
970
+ self._group_indices = groups_df.select(temp_col).to_series()
971
+ self._current_index = 0
972
+
973
+ return self
974
+
975
+ def __next__(self) -> tuple[tuple[object, ...], DataFrame]:
976
+ if self._current_index >= len(self._group_indices):
977
+ raise StopIteration
978
+
979
+ group_name = next(self._group_names)
980
+ group_data = self.df[self._group_indices[self._current_index], :]
981
+ self._current_index += 1
982
+
983
+ return group_name, group_data
984
+
985
+ def agg(
986
+ self,
987
+ *aggs: IntoExpr | Iterable[IntoExpr],
988
+ **named_aggs: IntoExpr,
989
+ ) -> DataFrame:
990
+ """
991
+ Compute aggregations for each group of a group by operation.
992
+
993
+ Parameters
994
+ ----------
995
+ *aggs
996
+ Aggregations to compute for each group of the group by operation,
997
+ specified as positional arguments.
998
+ Accepts expression input. Strings are parsed as column names.
999
+ **named_aggs
1000
+ Additional aggregations, specified as keyword arguments.
1001
+ The resulting columns will be renamed to the keyword used.
1002
+ """
1003
+ from polars.lazyframe.opt_flags import QueryOptFlags
1004
+
1005
+ return (
1006
+ self.df.lazy()
1007
+ .group_by_dynamic(
1008
+ index_column=self.time_column,
1009
+ every=self.every,
1010
+ period=self.period,
1011
+ offset=self.offset,
1012
+ label=self.label,
1013
+ include_boundaries=self.include_boundaries,
1014
+ closed=self.closed,
1015
+ group_by=self.group_by,
1016
+ start_by=self.start_by,
1017
+ )
1018
+ .agg(*aggs, **named_aggs)
1019
+ .collect(optimizations=QueryOptFlags.none())
1020
+ )
1021
+
1022
+ def map_groups(
1023
+ self,
1024
+ function: Callable[[DataFrame], DataFrame],
1025
+ schema: SchemaDict | None,
1026
+ ) -> DataFrame:
1027
+ """
1028
+ Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
1029
+
1030
+ Using this is considered an anti-pattern as it will be very slow because:
1031
+
1032
+ - it forces the engine to materialize the whole `DataFrames` for the groups.
1033
+ - it is not parallelized.
1034
+ - it blocks optimizations as the passed python function is opaque to the
1035
+ optimizer.
1036
+
1037
+ The idiomatic way to apply custom functions over multiple columns is using:
1038
+
1039
+ `pl.struct([my_columns]).map_elements(lambda struct_series: ..)`
1040
+
1041
+ Parameters
1042
+ ----------
1043
+ function
1044
+ Function to apply over each group of the `LazyFrame`; it receives
1045
+ a DataFrame and should return a DataFrame.
1046
+ schema
1047
+ Schema of the output function. This has to be known statically. If the
1048
+ given schema is incorrect, this is a bug in the caller's query and may
1049
+ lead to errors. If set to None, polars assumes the schema is unchanged.
1050
+ """
1051
+ from polars.lazyframe.opt_flags import QueryOptFlags
1052
+
1053
+ return (
1054
+ self.df.lazy()
1055
+ .group_by_dynamic(
1056
+ index_column=self.time_column,
1057
+ every=self.every,
1058
+ period=self.period,
1059
+ offset=self.offset,
1060
+ include_boundaries=self.include_boundaries,
1061
+ closed=self.closed,
1062
+ group_by=self.group_by,
1063
+ start_by=self.start_by,
1064
+ )
1065
+ .map_groups(function, schema)
1066
+ .collect(optimizations=QueryOptFlags.none())
1067
+ )