polars-runtime-compat 1.34.0b2__cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,669 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Callable
4
+
5
+ from polars import functions as F
6
+ from polars._utils.deprecation import deprecated
7
+ from polars._utils.parse import parse_into_list_of_expressions
8
+ from polars._utils.wrap import wrap_df, wrap_ldf
9
+
10
+ if TYPE_CHECKING:
11
+ import sys
12
+ from collections.abc import Iterable
13
+
14
+ from polars import DataFrame, LazyFrame
15
+ from polars._plr import PyLazyGroupBy
16
+ from polars._typing import IntoExpr, QuantileMethod, SchemaDict
17
+
18
+ if sys.version_info >= (3, 13):
19
+ from warnings import deprecated
20
+ else:
21
+ from typing_extensions import deprecated # noqa: TC004
22
+
23
+
24
+ class LazyGroupBy:
25
+ """
26
+ Utility class for performing a group by operation over a lazy DataFrame.
27
+
28
+ Generated by calling `df.lazy().group_by(...)`.
29
+ """
30
+
31
+ def __init__(self, lgb: PyLazyGroupBy) -> None:
32
+ self.lgb = lgb
33
+
34
+ def agg(
35
+ self,
36
+ *aggs: IntoExpr | Iterable[IntoExpr],
37
+ **named_aggs: IntoExpr,
38
+ ) -> LazyFrame:
39
+ """
40
+ Compute aggregations for each group of a group by operation.
41
+
42
+ Parameters
43
+ ----------
44
+ *aggs
45
+ Aggregations to compute for each group of the group by operation,
46
+ specified as positional arguments.
47
+ Accepts expression input. Strings are parsed as column names.
48
+ **named_aggs
49
+ Additional aggregations, specified as keyword arguments.
50
+ The resulting columns will be renamed to the keyword used.
51
+
52
+ Examples
53
+ --------
54
+ Compute the aggregation of the columns for each group.
55
+
56
+ >>> ldf = pl.DataFrame(
57
+ ... {
58
+ ... "a": ["a", "b", "a", "b", "c"],
59
+ ... "b": [1, 2, 1, 3, 3],
60
+ ... "c": [5, 4, 3, 2, 1],
61
+ ... }
62
+ ... ).lazy()
63
+ >>> ldf.group_by("a").agg(
64
+ ... [pl.col("b"), pl.col("c")]
65
+ ... ).collect() # doctest: +IGNORE_RESULT
66
+ shape: (3, 3)
67
+ ┌─────┬───────────┬───────────┐
68
+ │ a ┆ b ┆ c │
69
+ │ --- ┆ --- ┆ --- │
70
+ │ str ┆ list[i64] ┆ list[i64] │
71
+ ╞═════╪═══════════╪═══════════╡
72
+ │ a ┆ [1, 1] ┆ [5, 3] │
73
+ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
74
+ │ b ┆ [2, 3] ┆ [4, 2] │
75
+ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
76
+ │ c ┆ [3] ┆ [1] │
77
+ └─────┴───────────┴───────────┘
78
+
79
+ Compute the sum of a column for each group.
80
+
81
+ >>> ldf.group_by("a").agg(
82
+ ... pl.col("b").sum()
83
+ ... ).collect() # doctest: +IGNORE_RESULT
84
+ shape: (3, 2)
85
+ ┌─────┬─────┐
86
+ │ a ┆ b │
87
+ │ --- ┆ --- │
88
+ │ str ┆ i64 │
89
+ ╞═════╪═════╡
90
+ │ a ┆ 2 │
91
+ │ b ┆ 5 │
92
+ │ c ┆ 3 │
93
+ └─────┴─────┘
94
+
95
+ Compute multiple aggregates at once by passing a list of expressions.
96
+
97
+ >>> ldf.group_by("a").agg(
98
+ ... [pl.sum("b"), pl.mean("c")]
99
+ ... ).collect() # doctest: +IGNORE_RESULT
100
+ shape: (3, 3)
101
+ ┌─────┬─────┬─────┐
102
+ │ a ┆ b ┆ c │
103
+ │ --- ┆ --- ┆ --- │
104
+ │ str ┆ i64 ┆ f64 │
105
+ ╞═════╪═════╪═════╡
106
+ │ c ┆ 3 ┆ 1.0 │
107
+ │ a ┆ 2 ┆ 4.0 │
108
+ │ b ┆ 5 ┆ 3.0 │
109
+ └─────┴─────┴─────┘
110
+
111
+ Or use positional arguments to compute multiple aggregations in the same way.
112
+
113
+ >>> ldf.group_by("a").agg(
114
+ ... pl.sum("b").name.suffix("_sum"),
115
+ ... (pl.col("c") ** 2).mean().name.suffix("_mean_squared"),
116
+ ... ).collect() # doctest: +IGNORE_RESULT
117
+ shape: (3, 3)
118
+ ┌─────┬───────┬────────────────┐
119
+ │ a ┆ b_sum ┆ c_mean_squared │
120
+ │ --- ┆ --- ┆ --- │
121
+ │ str ┆ i64 ┆ f64 │
122
+ ╞═════╪═══════╪════════════════╡
123
+ │ a ┆ 2 ┆ 17.0 │
124
+ │ c ┆ 3 ┆ 1.0 │
125
+ │ b ┆ 5 ┆ 10.0 │
126
+ └─────┴───────┴────────────────┘
127
+
128
+ Use keyword arguments to easily name your expression inputs.
129
+
130
+ >>> ldf.group_by("a").agg(
131
+ ... b_sum=pl.sum("b"),
132
+ ... c_mean_squared=(pl.col("c") ** 2).mean(),
133
+ ... ).collect() # doctest: +IGNORE_RESULT
134
+ shape: (3, 3)
135
+ ┌─────┬───────┬────────────────┐
136
+ │ a ┆ b_sum ┆ c_mean_squared │
137
+ │ --- ┆ --- ┆ --- │
138
+ │ str ┆ i64 ┆ f64 │
139
+ ╞═════╪═══════╪════════════════╡
140
+ │ a ┆ 2 ┆ 17.0 │
141
+ │ c ┆ 3 ┆ 1.0 │
142
+ │ b ┆ 5 ┆ 10.0 │
143
+ └─────┴───────┴────────────────┘
144
+ """
145
+ if aggs and isinstance(aggs[0], dict):
146
+ msg = (
147
+ "specifying aggregations as a dictionary is not supported"
148
+ "\n\nTry unpacking the dictionary to take advantage of the keyword syntax"
149
+ " of the `agg` method."
150
+ )
151
+ raise TypeError(msg)
152
+
153
+ pyexprs = parse_into_list_of_expressions(*aggs, **named_aggs)
154
+ return wrap_ldf(self.lgb.agg(pyexprs))
155
+
156
+ def map_groups(
157
+ self,
158
+ function: Callable[[DataFrame], DataFrame],
159
+ schema: SchemaDict | None,
160
+ ) -> LazyFrame:
161
+ """
162
+ Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
163
+
164
+ .. warning::
165
+ This method is much slower than the native expressions API.
166
+ Only use it if you cannot implement your logic otherwise.
167
+
168
+ Using this is considered an anti-pattern as it will be very slow because:
169
+
170
+ - it forces the engine to materialize the whole `DataFrames` for the groups.
171
+ - it is not parallelized
172
+ - it blocks optimizations as the passed python function is opaque to the
173
+ optimizer
174
+
175
+ The idiomatic way to apply custom functions over multiple columns is using:
176
+
177
+ `pl.struct([my_columns]).apply(lambda struct_series: ..)`
178
+
179
+ Parameters
180
+ ----------
181
+ function
182
+ Function to apply over each group of the `LazyFrame`.
183
+ schema
184
+ Schema of the output function. This has to be known statically. If the
185
+ given schema is incorrect, this is a bug in the caller's query and may
186
+ lead to errors. If set to None, polars assumes the schema is unchanged.
187
+
188
+ Examples
189
+ --------
190
+ For each color group sample two rows:
191
+
192
+ >>> df = pl.DataFrame(
193
+ ... {
194
+ ... "id": [0, 1, 2, 3, 4],
195
+ ... "color": ["red", "green", "green", "red", "red"],
196
+ ... "shape": ["square", "triangle", "square", "triangle", "square"],
197
+ ... }
198
+ ... )
199
+ >>> (
200
+ ... df.lazy()
201
+ ... .group_by("color")
202
+ ... .map_groups(lambda group_df: group_df.sample(2), schema=None)
203
+ ... .collect()
204
+ ... ) # doctest: +IGNORE_RESULT
205
+ shape: (4, 3)
206
+ ┌─────┬───────┬──────────┐
207
+ │ id ┆ color ┆ shape │
208
+ │ --- ┆ --- ┆ --- │
209
+ │ i64 ┆ str ┆ str │
210
+ ╞═════╪═══════╪══════════╡
211
+ │ 1 ┆ green ┆ triangle │
212
+ │ 2 ┆ green ┆ square │
213
+ │ 4 ┆ red ┆ square │
214
+ │ 3 ┆ red ┆ triangle │
215
+ └─────┴───────┴──────────┘
216
+
217
+ It is better to implement this with an expression:
218
+
219
+ >>> df.lazy().filter(
220
+ ... pl.int_range(pl.len()).shuffle().over("color") < 2
221
+ ... ).collect() # doctest: +IGNORE_RESULT
222
+ """
223
+ return wrap_ldf(
224
+ self.lgb.map_groups(lambda df: function(wrap_df(df))._df, schema)
225
+ )
226
+
227
+ def head(self, n: int = 5) -> LazyFrame:
228
+ """
229
+ Get the first `n` rows of each group.
230
+
231
+ Parameters
232
+ ----------
233
+ n
234
+ Number of rows to return.
235
+
236
+ Examples
237
+ --------
238
+ >>> df = pl.DataFrame(
239
+ ... {
240
+ ... "letters": ["c", "c", "a", "c", "a", "b"],
241
+ ... "nrs": [1, 2, 3, 4, 5, 6],
242
+ ... }
243
+ ... )
244
+ >>> df
245
+ shape: (6, 2)
246
+ ┌─────────┬─────┐
247
+ │ letters ┆ nrs │
248
+ │ --- ┆ --- │
249
+ │ str ┆ i64 │
250
+ ╞═════════╪═════╡
251
+ │ c ┆ 1 │
252
+ │ c ┆ 2 │
253
+ │ a ┆ 3 │
254
+ │ c ┆ 4 │
255
+ │ a ┆ 5 │
256
+ │ b ┆ 6 │
257
+ └─────────┴─────┘
258
+ >>> df.group_by("letters").head(2).sort("letters")
259
+ shape: (5, 2)
260
+ ┌─────────┬─────┐
261
+ │ letters ┆ nrs │
262
+ │ --- ┆ --- │
263
+ │ str ┆ i64 │
264
+ ╞═════════╪═════╡
265
+ │ a ┆ 3 │
266
+ │ a ┆ 5 │
267
+ │ b ┆ 6 │
268
+ │ c ┆ 1 │
269
+ │ c ┆ 2 │
270
+ └─────────┴─────┘
271
+ """
272
+ return wrap_ldf(self.lgb.head(n))
273
+
274
+ def tail(self, n: int = 5) -> LazyFrame:
275
+ """
276
+ Get the last `n` rows of each group.
277
+
278
+ Parameters
279
+ ----------
280
+ n
281
+ Number of rows to return.
282
+
283
+ Examples
284
+ --------
285
+ >>> df = pl.DataFrame(
286
+ ... {
287
+ ... "letters": ["c", "c", "a", "c", "a", "b"],
288
+ ... "nrs": [1, 2, 3, 4, 5, 6],
289
+ ... }
290
+ ... )
291
+ >>> df
292
+ shape: (6, 2)
293
+ ┌─────────┬─────┐
294
+ │ letters ┆ nrs │
295
+ │ --- ┆ --- │
296
+ │ str ┆ i64 │
297
+ ╞═════════╪═════╡
298
+ │ c ┆ 1 │
299
+ │ c ┆ 2 │
300
+ │ a ┆ 3 │
301
+ │ c ┆ 4 │
302
+ │ a ┆ 5 │
303
+ │ b ┆ 6 │
304
+ └─────────┴─────┘
305
+ >>> df.group_by("letters").tail(2).sort("letters")
306
+ shape: (5, 2)
307
+ ┌─────────┬─────┐
308
+ │ letters ┆ nrs │
309
+ │ --- ┆ --- │
310
+ │ str ┆ i64 │
311
+ ╞═════════╪═════╡
312
+ │ a ┆ 3 │
313
+ │ a ┆ 5 │
314
+ │ b ┆ 6 │
315
+ │ c ┆ 2 │
316
+ │ c ┆ 4 │
317
+ └─────────┴─────┘
318
+ """
319
+ return wrap_ldf(self.lgb.tail(n))
320
+
321
+ def all(self) -> LazyFrame:
322
+ """
323
+ Aggregate the groups into Series.
324
+
325
+ Examples
326
+ --------
327
+ >>> ldf = pl.DataFrame(
328
+ ... {
329
+ ... "a": ["one", "two", "one", "two"],
330
+ ... "b": [1, 2, 3, 4],
331
+ ... }
332
+ ... ).lazy()
333
+ >>> ldf.group_by("a", maintain_order=True).all().collect()
334
+ shape: (2, 2)
335
+ ┌─────┬───────────┐
336
+ │ a ┆ b │
337
+ │ --- ┆ --- │
338
+ │ str ┆ list[i64] │
339
+ ╞═════╪═══════════╡
340
+ │ one ┆ [1, 3] │
341
+ │ two ┆ [2, 4] │
342
+ └─────┴───────────┘
343
+ """
344
+ return self.agg(F.all())
345
+
346
+ def len(self, name: str | None = None) -> LazyFrame:
347
+ """
348
+ Return the number of rows in each group.
349
+
350
+ Parameters
351
+ ----------
352
+ name
353
+ Assign a name to the resulting column; if unset, defaults to "len".
354
+
355
+ Examples
356
+ --------
357
+ >>> lf = pl.LazyFrame({"a": ["Apple", "Apple", "Orange"], "b": [1, None, 2]})
358
+ >>> lf.group_by("a").len().collect() # doctest: +IGNORE_RESULT
359
+ shape: (2, 2)
360
+ ┌────────┬─────┐
361
+ │ a ┆ len │
362
+ │ --- ┆ --- │
363
+ │ str ┆ u32 │
364
+ ╞════════╪═════╡
365
+ │ Apple ┆ 2 │
366
+ │ Orange ┆ 1 │
367
+ └────────┴─────┘
368
+ >>> lf.group_by("a").len(name="n").collect() # doctest: +IGNORE_RESULT
369
+ shape: (2, 2)
370
+ ┌────────┬─────┐
371
+ │ a ┆ n │
372
+ │ --- ┆ --- │
373
+ │ str ┆ u32 │
374
+ ╞════════╪═════╡
375
+ │ Apple ┆ 2 │
376
+ │ Orange ┆ 1 │
377
+ └────────┴─────┘
378
+ """
379
+ len_expr = F.len()
380
+ if name is not None:
381
+ len_expr = len_expr.alias(name)
382
+ return self.agg(len_expr)
383
+
384
+ @deprecated("`count` was renamed; use `len` instead")
385
+ def count(self) -> LazyFrame:
386
+ """
387
+ Return the number of rows in each group.
388
+
389
+ .. deprecated:: 0.20.5
390
+ This method has been renamed to :func:`LazyGroupBy.len`.
391
+
392
+ Rows containing null values count towards the total.
393
+
394
+ Examples
395
+ --------
396
+ >>> lf = pl.LazyFrame(
397
+ ... {
398
+ ... "a": ["Apple", "Apple", "Orange"],
399
+ ... "b": [1, None, 2],
400
+ ... }
401
+ ... )
402
+ >>> lf.group_by("a").count().collect() # doctest: +SKIP
403
+ shape: (2, 2)
404
+ ┌────────┬───────┐
405
+ │ a ┆ count │
406
+ │ --- ┆ --- │
407
+ │ str ┆ u32 │
408
+ ╞════════╪═══════╡
409
+ │ Apple ┆ 2 │
410
+ │ Orange ┆ 1 │
411
+ └────────┴───────┘
412
+ """
413
+ return self.agg(F.len().alias("count"))
414
+
415
+ def first(self) -> LazyFrame:
416
+ """
417
+ Aggregate the first values in the group.
418
+
419
+ Examples
420
+ --------
421
+ >>> ldf = pl.DataFrame(
422
+ ... {
423
+ ... "a": [1, 2, 2, 3, 4, 5],
424
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
425
+ ... "c": [True, True, True, False, False, True],
426
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
427
+ ... }
428
+ ... ).lazy()
429
+ >>> ldf.group_by("d", maintain_order=True).first().collect()
430
+ shape: (3, 4)
431
+ ┌────────┬─────┬──────┬───────┐
432
+ │ d ┆ a ┆ b ┆ c │
433
+ │ --- ┆ --- ┆ --- ┆ --- │
434
+ │ str ┆ i64 ┆ f64 ┆ bool │
435
+ ╞════════╪═════╪══════╪═══════╡
436
+ │ Apple ┆ 1 ┆ 0.5 ┆ true │
437
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
438
+ │ Banana ┆ 4 ┆ 13.0 ┆ false │
439
+ └────────┴─────┴──────┴───────┘
440
+ """
441
+ return self.agg(F.all().first())
442
+
443
+ def last(self) -> LazyFrame:
444
+ """
445
+ Aggregate the last values in the group.
446
+
447
+ Examples
448
+ --------
449
+ >>> ldf = pl.DataFrame(
450
+ ... {
451
+ ... "a": [1, 2, 2, 3, 4, 5],
452
+ ... "b": [0.5, 0.5, 4, 10, 14, 13],
453
+ ... "c": [True, True, True, False, False, True],
454
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
455
+ ... }
456
+ ... ).lazy()
457
+ >>> ldf.group_by("d", maintain_order=True).last().collect()
458
+ shape: (3, 4)
459
+ ┌────────┬─────┬──────┬───────┐
460
+ │ d ┆ a ┆ b ┆ c │
461
+ │ --- ┆ --- ┆ --- ┆ --- │
462
+ │ str ┆ i64 ┆ f64 ┆ bool │
463
+ ╞════════╪═════╪══════╪═══════╡
464
+ │ Apple ┆ 3 ┆ 10.0 ┆ false │
465
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
466
+ │ Banana ┆ 5 ┆ 13.0 ┆ true │
467
+ └────────┴─────┴──────┴───────┘
468
+ """
469
+ return self.agg(F.all().last())
470
+
471
+ def max(self) -> LazyFrame:
472
+ """
473
+ Reduce the groups to the maximal value.
474
+
475
+ Examples
476
+ --------
477
+ >>> ldf = pl.DataFrame(
478
+ ... {
479
+ ... "a": [1, 2, 2, 3, 4, 5],
480
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
481
+ ... "c": [True, True, True, False, False, True],
482
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
483
+ ... }
484
+ ... ).lazy()
485
+ >>> ldf.group_by("d", maintain_order=True).max().collect()
486
+ shape: (3, 4)
487
+ ┌────────┬─────┬──────┬──────┐
488
+ │ d ┆ a ┆ b ┆ c │
489
+ │ --- ┆ --- ┆ --- ┆ --- │
490
+ │ str ┆ i64 ┆ f64 ┆ bool │
491
+ ╞════════╪═════╪══════╪══════╡
492
+ │ Apple ┆ 3 ┆ 10.0 ┆ true │
493
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
494
+ │ Banana ┆ 5 ┆ 14.0 ┆ true │
495
+ └────────┴─────┴──────┴──────┘
496
+ """
497
+ return self.agg(F.all().max())
498
+
499
+ def mean(self) -> LazyFrame:
500
+ """
501
+ Reduce the groups to the mean values.
502
+
503
+ Examples
504
+ --------
505
+ >>> ldf = pl.DataFrame(
506
+ ... {
507
+ ... "a": [1, 2, 2, 3, 4, 5],
508
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
509
+ ... "c": [True, True, True, False, False, True],
510
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
511
+ ... }
512
+ ... ).lazy()
513
+ >>> ldf.group_by("d", maintain_order=True).mean().collect()
514
+ shape: (3, 4)
515
+ ┌────────┬─────┬──────────┬──────────┐
516
+ │ d ┆ a ┆ b ┆ c │
517
+ │ --- ┆ --- ┆ --- ┆ --- │
518
+ │ str ┆ f64 ┆ f64 ┆ f64 │
519
+ ╞════════╪═════╪══════════╪══════════╡
520
+ │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
521
+ │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
522
+ │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
523
+ └────────┴─────┴──────────┴──────────┘
524
+ """
525
+ return self.agg(F.all().mean())
526
+
527
+ def median(self) -> LazyFrame:
528
+ """
529
+ Return the median per group.
530
+
531
+ Examples
532
+ --------
533
+ >>> ldf = pl.DataFrame(
534
+ ... {
535
+ ... "a": [1, 2, 2, 3, 4, 5],
536
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
537
+ ... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
538
+ ... }
539
+ ... ).lazy()
540
+ >>> ldf.group_by("d", maintain_order=True).median().collect()
541
+ shape: (2, 3)
542
+ ┌────────┬─────┬──────┐
543
+ │ d ┆ a ┆ b │
544
+ │ --- ┆ --- ┆ --- │
545
+ │ str ┆ f64 ┆ f64 │
546
+ ╞════════╪═════╪══════╡
547
+ │ Apple ┆ 2.0 ┆ 4.0 │
548
+ │ Banana ┆ 4.0 ┆ 13.0 │
549
+ └────────┴─────┴──────┘
550
+ """
551
+ return self.agg(F.all().median())
552
+
553
+ def min(self) -> LazyFrame:
554
+ """
555
+ Reduce the groups to the minimal value.
556
+
557
+ Examples
558
+ --------
559
+ >>> ldf = pl.DataFrame(
560
+ ... {
561
+ ... "a": [1, 2, 2, 3, 4, 5],
562
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
563
+ ... "c": [True, True, True, False, False, True],
564
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
565
+ ... }
566
+ ... ).lazy()
567
+ >>> ldf.group_by("d", maintain_order=True).min().collect()
568
+ shape: (3, 4)
569
+ ┌────────┬─────┬──────┬───────┐
570
+ │ d ┆ a ┆ b ┆ c │
571
+ │ --- ┆ --- ┆ --- ┆ --- │
572
+ │ str ┆ i64 ┆ f64 ┆ bool │
573
+ ╞════════╪═════╪══════╪═══════╡
574
+ │ Apple ┆ 1 ┆ 0.5 ┆ false │
575
+ │ Orange ┆ 2 ┆ 0.5 ┆ true │
576
+ │ Banana ┆ 4 ┆ 13.0 ┆ false │
577
+ └────────┴─────┴──────┴───────┘
578
+ """
579
+ return self.agg(F.all().min())
580
+
581
+ def n_unique(self) -> LazyFrame:
582
+ """
583
+ Count the unique values per group.
584
+
585
+ Examples
586
+ --------
587
+ >>> ldf = pl.DataFrame(
588
+ ... {
589
+ ... "a": [1, 2, 1, 3, 4, 5],
590
+ ... "b": [0.5, 0.5, 0.5, 10, 13, 14],
591
+ ... "d": ["Apple", "Banana", "Apple", "Apple", "Banana", "Banana"],
592
+ ... }
593
+ ... ).lazy()
594
+ >>> ldf.group_by("d", maintain_order=True).n_unique().collect()
595
+ shape: (2, 3)
596
+ ┌────────┬─────┬─────┐
597
+ │ d ┆ a ┆ b │
598
+ │ --- ┆ --- ┆ --- │
599
+ │ str ┆ u32 ┆ u32 │
600
+ ╞════════╪═════╪═════╡
601
+ │ Apple ┆ 2 ┆ 2 │
602
+ │ Banana ┆ 3 ┆ 3 │
603
+ └────────┴─────┴─────┘
604
+ """
605
+ return self.agg(F.all().n_unique())
606
+
607
+ def quantile(
608
+ self, quantile: float, interpolation: QuantileMethod = "nearest"
609
+ ) -> LazyFrame:
610
+ """
611
+ Compute the quantile per group.
612
+
613
+ Parameters
614
+ ----------
615
+ quantile
616
+ Quantile between 0.0 and 1.0.
617
+ interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'}
618
+ Interpolation method.
619
+
620
+ Examples
621
+ --------
622
+ >>> ldf = pl.DataFrame(
623
+ ... {
624
+ ... "a": [1, 2, 2, 3, 4, 5],
625
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
626
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
627
+ ... }
628
+ ... ).lazy()
629
+ >>> ldf.group_by("d", maintain_order=True).quantile(1).collect()
630
+ shape: (3, 3)
631
+ ┌────────┬─────┬──────┐
632
+ │ d ┆ a ┆ b │
633
+ │ --- ┆ --- ┆ --- │
634
+ │ str ┆ f64 ┆ f64 │
635
+ ╞════════╪═════╪══════╡
636
+ │ Apple ┆ 3.0 ┆ 10.0 │
637
+ │ Orange ┆ 2.0 ┆ 0.5 │
638
+ │ Banana ┆ 5.0 ┆ 14.0 │
639
+ └────────┴─────┴──────┘
640
+ """ # noqa: W505
641
+ return self.agg(F.all().quantile(quantile, interpolation=interpolation))
642
+
643
+ def sum(self) -> LazyFrame:
644
+ """
645
+ Reduce the groups to the sum.
646
+
647
+ Examples
648
+ --------
649
+ >>> ldf = pl.DataFrame(
650
+ ... {
651
+ ... "a": [1, 2, 2, 3, 4, 5],
652
+ ... "b": [0.5, 0.5, 4, 10, 13, 14],
653
+ ... "c": [True, True, True, False, False, True],
654
+ ... "d": ["Apple", "Orange", "Apple", "Apple", "Banana", "Banana"],
655
+ ... }
656
+ ... ).lazy()
657
+ >>> ldf.group_by("d", maintain_order=True).sum().collect()
658
+ shape: (3, 4)
659
+ ┌────────┬─────┬──────┬─────┐
660
+ │ d ┆ a ┆ b ┆ c │
661
+ │ --- ┆ --- ┆ --- ┆ --- │
662
+ │ str ┆ i64 ┆ f64 ┆ u32 │
663
+ ╞════════╪═════╪══════╪═════╡
664
+ │ Apple ┆ 6 ┆ 14.5 ┆ 2 │
665
+ │ Orange ┆ 2 ┆ 0.5 ┆ 1 │
666
+ │ Banana ┆ 9 ┆ 27.0 ┆ 1 │
667
+ └────────┴─────┴──────┴─────┘
668
+ """
669
+ return self.agg(F.all().sum())