polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b4__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  2. {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
  3. polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -96
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/expr/list.py DELETED
@@ -1,1408 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import copy
4
- from collections.abc import Collection, Sequence
5
- from typing import TYPE_CHECKING, Any, Callable
6
-
7
- import polars._reexport as pl
8
- from polars import exceptions
9
- from polars import functions as F
10
- from polars._utils.parse import parse_into_expression
11
- from polars._utils.various import issue_warning
12
- from polars._utils.wrap import wrap_expr
13
-
14
- if TYPE_CHECKING:
15
- from polars import Expr, Series
16
- from polars._typing import (
17
- IntoExpr,
18
- IntoExprColumn,
19
- ListToStructWidthStrategy,
20
- NullBehavior,
21
- )
22
-
23
-
24
- class ExprListNameSpace:
25
- """Namespace for list related expressions."""
26
-
27
- _accessor = "list"
28
-
29
- def __init__(self, expr: Expr) -> None:
30
- self._pyexpr = expr._pyexpr
31
-
32
- def __getitem__(self, item: int) -> Expr:
33
- return self.get(item)
34
-
35
- def all(self) -> Expr:
36
- """
37
- Evaluate whether all boolean values in a list are true.
38
-
39
- Notes
40
- -----
41
- If there are no non-null elements in a row, the output is `True`.
42
-
43
- Examples
44
- --------
45
- >>> df = pl.DataFrame(
46
- ... {"a": [[True, True], [False, True], [False, False], [None], [], None]}
47
- ... )
48
- >>> df.with_columns(all=pl.col("a").list.all())
49
- shape: (6, 2)
50
- ┌────────────────┬───────┐
51
- │ a ┆ all │
52
- │ --- ┆ --- │
53
- │ list[bool] ┆ bool │
54
- ╞════════════════╪═══════╡
55
- │ [true, true] ┆ true │
56
- │ [false, true] ┆ false │
57
- │ [false, false] ┆ false │
58
- │ [null] ┆ true │
59
- │ [] ┆ true │
60
- │ null ┆ null │
61
- └────────────────┴───────┘
62
- """
63
- return wrap_expr(self._pyexpr.list_all())
64
-
65
- def any(self) -> Expr:
66
- """
67
- Evaluate whether any boolean value in a list is true.
68
-
69
- Notes
70
- -----
71
- If there are no non-null elements in a row, the output is `False`.
72
-
73
- Examples
74
- --------
75
- >>> df = pl.DataFrame(
76
- ... {"a": [[True, True], [False, True], [False, False], [None], [], None]}
77
- ... )
78
- >>> df.with_columns(any=pl.col("a").list.any())
79
- shape: (6, 2)
80
- ┌────────────────┬───────┐
81
- │ a ┆ any │
82
- │ --- ┆ --- │
83
- │ list[bool] ┆ bool │
84
- ╞════════════════╪═══════╡
85
- │ [true, true] ┆ true │
86
- │ [false, true] ┆ true │
87
- │ [false, false] ┆ false │
88
- │ [null] ┆ false │
89
- │ [] ┆ false │
90
- │ null ┆ null │
91
- └────────────────┴───────┘
92
- """
93
- return wrap_expr(self._pyexpr.list_any())
94
-
95
- def len(self) -> Expr:
96
- """
97
- Return the number of elements in each list.
98
-
99
- Null values count towards the total.
100
-
101
- Returns
102
- -------
103
- Expr
104
- Expression of data type :class:`UInt32`.
105
-
106
- Examples
107
- --------
108
- >>> df = pl.DataFrame({"a": [[1, 2, None], [5]]})
109
- >>> df.with_columns(len=pl.col("a").list.len())
110
- shape: (2, 2)
111
- ┌──────────────┬─────┐
112
- │ a ┆ len │
113
- │ --- ┆ --- │
114
- │ list[i64] ┆ u32 │
115
- ╞══════════════╪═════╡
116
- │ [1, 2, null] ┆ 3 │
117
- │ [5] ┆ 1 │
118
- └──────────────┴─────┘
119
- """
120
- return wrap_expr(self._pyexpr.list_len())
121
-
122
- def drop_nulls(self) -> Expr:
123
- """
124
- Drop all null values in the list.
125
-
126
- The original order of the remaining elements is preserved.
127
-
128
- Examples
129
- --------
130
- >>> df = pl.DataFrame({"values": [[None, 1, None, 2], [None], [3, 4]]})
131
- >>> df.with_columns(drop_nulls=pl.col("values").list.drop_nulls())
132
- shape: (3, 2)
133
- ┌────────────────┬────────────┐
134
- │ values ┆ drop_nulls │
135
- │ --- ┆ --- │
136
- │ list[i64] ┆ list[i64] │
137
- ╞════════════════╪════════════╡
138
- │ [null, 1, … 2] ┆ [1, 2] │
139
- │ [null] ┆ [] │
140
- │ [3, 4] ┆ [3, 4] │
141
- └────────────────┴────────────┘
142
- """
143
- return wrap_expr(self._pyexpr.list_drop_nulls())
144
-
145
- def sample(
146
- self,
147
- n: int | IntoExprColumn | None = None,
148
- *,
149
- fraction: float | IntoExprColumn | None = None,
150
- with_replacement: bool = False,
151
- shuffle: bool = False,
152
- seed: int | None = None,
153
- ) -> Expr:
154
- """
155
- Sample from this list.
156
-
157
- Parameters
158
- ----------
159
- n
160
- Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
161
- `fraction` is None.
162
- fraction
163
- Fraction of items to return. Cannot be used with `n`.
164
- with_replacement
165
- Allow values to be sampled more than once.
166
- shuffle
167
- Shuffle the order of sampled data points.
168
- seed
169
- Seed for the random number generator. If set to None (default), a
170
- random seed is generated for each sample operation.
171
-
172
- Examples
173
- --------
174
- >>> df = pl.DataFrame({"values": [[1, 2, 3], [4, 5]], "n": [2, 1]})
175
- >>> df.with_columns(sample=pl.col("values").list.sample(n=pl.col("n"), seed=1))
176
- shape: (2, 3)
177
- ┌───────────┬─────┬───────────┐
178
- │ values ┆ n ┆ sample │
179
- │ --- ┆ --- ┆ --- │
180
- │ list[i64] ┆ i64 ┆ list[i64] │
181
- ╞═══════════╪═════╪═══════════╡
182
- │ [1, 2, 3] ┆ 2 ┆ [2, 3] │
183
- │ [4, 5] ┆ 1 ┆ [5] │
184
- └───────────┴─────┴───────────┘
185
- """
186
- if n is not None and fraction is not None:
187
- msg = "cannot specify both `n` and `fraction`"
188
- raise ValueError(msg)
189
-
190
- if fraction is not None:
191
- fraction_pyexpr = parse_into_expression(fraction)
192
- return wrap_expr(
193
- self._pyexpr.list_sample_fraction(
194
- fraction_pyexpr, with_replacement, shuffle, seed
195
- )
196
- )
197
-
198
- if n is None:
199
- n = 1
200
- n_pyexpr = parse_into_expression(n)
201
- return wrap_expr(
202
- self._pyexpr.list_sample_n(n_pyexpr, with_replacement, shuffle, seed)
203
- )
204
-
205
- def sum(self) -> Expr:
206
- """
207
- Sum all the lists in the array.
208
-
209
- Notes
210
- -----
211
- If there are no non-null elements in a row, the output is `0`.
212
-
213
- Examples
214
- --------
215
- >>> df = pl.DataFrame({"values": [[1], [2, 3]]})
216
- >>> df.with_columns(sum=pl.col("values").list.sum())
217
- shape: (2, 2)
218
- ┌───────────┬─────┐
219
- │ values ┆ sum │
220
- │ --- ┆ --- │
221
- │ list[i64] ┆ i64 │
222
- ╞═══════════╪═════╡
223
- │ [1] ┆ 1 │
224
- │ [2, 3] ┆ 5 │
225
- └───────────┴─────┘
226
- """
227
- return wrap_expr(self._pyexpr.list_sum())
228
-
229
- def max(self) -> Expr:
230
- """
231
- Compute the max value of the lists in the array.
232
-
233
- Examples
234
- --------
235
- >>> df = pl.DataFrame({"values": [[1], [2, 3]]})
236
- >>> df.with_columns(max=pl.col("values").list.max())
237
- shape: (2, 2)
238
- ┌───────────┬─────┐
239
- │ values ┆ max │
240
- │ --- ┆ --- │
241
- │ list[i64] ┆ i64 │
242
- ╞═══════════╪═════╡
243
- │ [1] ┆ 1 │
244
- │ [2, 3] ┆ 3 │
245
- └───────────┴─────┘
246
- """
247
- return wrap_expr(self._pyexpr.list_max())
248
-
249
- def min(self) -> Expr:
250
- """
251
- Compute the min value of the lists in the array.
252
-
253
- Examples
254
- --------
255
- >>> df = pl.DataFrame({"values": [[1], [2, 3]]})
256
- >>> df.with_columns(min=pl.col("values").list.min())
257
- shape: (2, 2)
258
- ┌───────────┬─────┐
259
- │ values ┆ min │
260
- │ --- ┆ --- │
261
- │ list[i64] ┆ i64 │
262
- ╞═══════════╪═════╡
263
- │ [1] ┆ 1 │
264
- │ [2, 3] ┆ 2 │
265
- └───────────┴─────┘
266
- """
267
- return wrap_expr(self._pyexpr.list_min())
268
-
269
- def mean(self) -> Expr:
270
- """
271
- Compute the mean value of the lists in the array.
272
-
273
- Examples
274
- --------
275
- >>> df = pl.DataFrame({"values": [[1], [2, 3]]})
276
- >>> df.with_columns(mean=pl.col("values").list.mean())
277
- shape: (2, 2)
278
- ┌───────────┬──────┐
279
- │ values ┆ mean │
280
- │ --- ┆ --- │
281
- │ list[i64] ┆ f64 │
282
- ╞═══════════╪══════╡
283
- │ [1] ┆ 1.0 │
284
- │ [2, 3] ┆ 2.5 │
285
- └───────────┴──────┘
286
- """
287
- return wrap_expr(self._pyexpr.list_mean())
288
-
289
- def median(self) -> Expr:
290
- """
291
- Compute the median value of the lists in the array.
292
-
293
- Examples
294
- --------
295
- >>> df = pl.DataFrame({"values": [[-1, 0, 1], [1, 10]]})
296
- >>> df.with_columns(pl.col("values").list.median().alias("median"))
297
- shape: (2, 2)
298
- ┌────────────┬────────┐
299
- │ values ┆ median │
300
- │ --- ┆ --- │
301
- │ list[i64] ┆ f64 │
302
- ╞════════════╪════════╡
303
- │ [-1, 0, 1] ┆ 0.0 │
304
- │ [1, 10] ┆ 5.5 │
305
- └────────────┴────────┘
306
- """
307
- return wrap_expr(self._pyexpr.list_median())
308
-
309
- def std(self, ddof: int = 1) -> Expr:
310
- """
311
- Compute the std value of the lists in the array.
312
-
313
- Parameters
314
- ----------
315
- ddof
316
- “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
317
- where N represents the number of elements.
318
- By default ddof is 1.
319
-
320
- Examples
321
- --------
322
- >>> df = pl.DataFrame({"values": [[-1, 0, 1], [1, 10]]})
323
- >>> df.with_columns(pl.col("values").list.std().alias("std"))
324
- shape: (2, 2)
325
- ┌────────────┬──────────┐
326
- │ values ┆ std │
327
- │ --- ┆ --- │
328
- │ list[i64] ┆ f64 │
329
- ╞════════════╪══════════╡
330
- │ [-1, 0, 1] ┆ 1.0 │
331
- │ [1, 10] ┆ 6.363961 │
332
- └────────────┴──────────┘
333
- """
334
- return wrap_expr(self._pyexpr.list_std(ddof))
335
-
336
- def var(self, ddof: int = 1) -> Expr:
337
- """
338
- Compute the var value of the lists in the array.
339
-
340
- Parameters
341
- ----------
342
- ddof
343
- “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
344
- where N represents the number of elements.
345
- By default ddof is 1.
346
-
347
- Examples
348
- --------
349
- >>> df = pl.DataFrame({"values": [[-1, 0, 1], [1, 10]]})
350
- >>> df.with_columns(pl.col("values").list.var().alias("var"))
351
- shape: (2, 2)
352
- ┌────────────┬──────┐
353
- │ values ┆ var │
354
- │ --- ┆ --- │
355
- │ list[i64] ┆ f64 │
356
- ╞════════════╪══════╡
357
- │ [-1, 0, 1] ┆ 1.0 │
358
- │ [1, 10] ┆ 40.5 │
359
- └────────────┴──────┘
360
- """
361
- return wrap_expr(self._pyexpr.list_var(ddof))
362
-
363
- def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
364
- """
365
- Sort the lists in this column.
366
-
367
- Parameters
368
- ----------
369
- descending
370
- Sort in descending order.
371
- nulls_last
372
- Place null values last.
373
-
374
- Examples
375
- --------
376
- >>> df = pl.DataFrame(
377
- ... {
378
- ... "a": [[3, 2, 1], [9, 1, 2]],
379
- ... }
380
- ... )
381
- >>> df.with_columns(sort=pl.col("a").list.sort())
382
- shape: (2, 2)
383
- ┌───────────┬───────────┐
384
- │ a ┆ sort │
385
- │ --- ┆ --- │
386
- │ list[i64] ┆ list[i64] │
387
- ╞═══════════╪═══════════╡
388
- │ [3, 2, 1] ┆ [1, 2, 3] │
389
- │ [9, 1, 2] ┆ [1, 2, 9] │
390
- └───────────┴───────────┘
391
- >>> df.with_columns(sort=pl.col("a").list.sort(descending=True))
392
- shape: (2, 2)
393
- ┌───────────┬───────────┐
394
- │ a ┆ sort │
395
- │ --- ┆ --- │
396
- │ list[i64] ┆ list[i64] │
397
- ╞═══════════╪═══════════╡
398
- │ [3, 2, 1] ┆ [3, 2, 1] │
399
- │ [9, 1, 2] ┆ [9, 2, 1] │
400
- └───────────┴───────────┘
401
- """
402
- return wrap_expr(self._pyexpr.list_sort(descending, nulls_last))
403
-
404
- def reverse(self) -> Expr:
405
- """
406
- Reverse the arrays in the list.
407
-
408
- Examples
409
- --------
410
- >>> df = pl.DataFrame(
411
- ... {
412
- ... "a": [[3, 2, 1], [9, 1, 2]],
413
- ... }
414
- ... )
415
- >>> df.with_columns(reverse=pl.col("a").list.reverse())
416
- shape: (2, 2)
417
- ┌───────────┬───────────┐
418
- │ a ┆ reverse │
419
- │ --- ┆ --- │
420
- │ list[i64] ┆ list[i64] │
421
- ╞═══════════╪═══════════╡
422
- │ [3, 2, 1] ┆ [1, 2, 3] │
423
- │ [9, 1, 2] ┆ [2, 1, 9] │
424
- └───────────┴───────────┘
425
- """
426
- return wrap_expr(self._pyexpr.list_reverse())
427
-
428
- def unique(self, *, maintain_order: bool = False) -> Expr:
429
- """
430
- Get the unique/distinct values in the list.
431
-
432
- Parameters
433
- ----------
434
- maintain_order
435
- Maintain order of data. This requires more work.
436
-
437
- Examples
438
- --------
439
- >>> df = pl.DataFrame(
440
- ... {
441
- ... "a": [[1, 1, 2]],
442
- ... }
443
- ... )
444
- >>> df.with_columns(unique=pl.col("a").list.unique())
445
- shape: (1, 2)
446
- ┌───────────┬───────────┐
447
- │ a ┆ unique │
448
- │ --- ┆ --- │
449
- │ list[i64] ┆ list[i64] │
450
- ╞═══════════╪═══════════╡
451
- │ [1, 1, 2] ┆ [1, 2] │
452
- └───────────┴───────────┘
453
- """
454
- return wrap_expr(self._pyexpr.list_unique(maintain_order))
455
-
456
- def n_unique(self) -> Expr:
457
- """
458
- Count the number of unique values in every sub-lists.
459
-
460
- Examples
461
- --------
462
- >>> df = pl.DataFrame(
463
- ... {
464
- ... "a": [[1, 1, 2], [2, 3, 4]],
465
- ... }
466
- ... )
467
- >>> df.with_columns(n_unique=pl.col("a").list.n_unique())
468
- shape: (2, 2)
469
- ┌───────────┬──────────┐
470
- │ a ┆ n_unique │
471
- │ --- ┆ --- │
472
- │ list[i64] ┆ u32 │
473
- ╞═══════════╪══════════╡
474
- │ [1, 1, 2] ┆ 2 │
475
- │ [2, 3, 4] ┆ 3 │
476
- └───────────┴──────────┘
477
- """
478
- return wrap_expr(self._pyexpr.list_n_unique())
479
-
480
- def concat(self, other: list[Expr | str] | Expr | str | Series | list[Any]) -> Expr:
481
- """
482
- Concat the arrays in a Series dtype List in linear time.
483
-
484
- Parameters
485
- ----------
486
- other
487
- Columns to concat into a List Series
488
-
489
- Examples
490
- --------
491
- >>> df = pl.DataFrame(
492
- ... {
493
- ... "a": [["a"], ["x"]],
494
- ... "b": [["b", "c"], ["y", "z"]],
495
- ... }
496
- ... )
497
- >>> df.with_columns(concat=pl.col("a").list.concat("b"))
498
- shape: (2, 3)
499
- ┌───────────┬────────────┬─────────────────┐
500
- │ a ┆ b ┆ concat │
501
- │ --- ┆ --- ┆ --- │
502
- │ list[str] ┆ list[str] ┆ list[str] │
503
- ╞═══════════╪════════════╪═════════════════╡
504
- │ ["a"] ┆ ["b", "c"] ┆ ["a", "b", "c"] │
505
- │ ["x"] ┆ ["y", "z"] ┆ ["x", "y", "z"] │
506
- └───────────┴────────────┴─────────────────┘
507
- """
508
- if isinstance(other, list) and (
509
- not isinstance(other[0], (pl.Expr, str, pl.Series))
510
- ):
511
- return self.concat(pl.Series([other]))
512
-
513
- other_list: list[Expr | str | Series]
514
- other_list = [other] if not isinstance(other, list) else copy.copy(other) # type: ignore[arg-type]
515
-
516
- other_list.insert(0, wrap_expr(self._pyexpr))
517
- return F.concat_list(other_list)
518
-
519
- def get(
520
- self,
521
- index: int | Expr | str,
522
- *,
523
- null_on_oob: bool = False,
524
- ) -> Expr:
525
- """
526
- Get the value by index in the sublists.
527
-
528
- So index `0` would return the first item of every sublist
529
- and index `-1` would return the last item of every sublist
530
- if an index is out of bounds, it will return a `None`.
531
-
532
- Parameters
533
- ----------
534
- index
535
- Index to return per sublist
536
- null_on_oob
537
- Behavior if an index is out of bounds:
538
-
539
- * True -> set as null
540
- * False -> raise an error
541
-
542
- Examples
543
- --------
544
- >>> df = pl.DataFrame({"a": [[3, 2, 1], [], [1, 2]]})
545
- >>> df.with_columns(get=pl.col("a").list.get(0, null_on_oob=True))
546
- shape: (3, 2)
547
- ┌───────────┬──────┐
548
- │ a ┆ get │
549
- │ --- ┆ --- │
550
- │ list[i64] ┆ i64 │
551
- ╞═══════════╪══════╡
552
- │ [3, 2, 1] ┆ 3 │
553
- │ [] ┆ null │
554
- │ [1, 2] ┆ 1 │
555
- └───────────┴──────┘
556
- """
557
- index_pyexpr = parse_into_expression(index)
558
- return wrap_expr(self._pyexpr.list_get(index_pyexpr, null_on_oob))
559
-
560
- def gather(
561
- self,
562
- indices: Expr | Series | list[int] | list[list[int]],
563
- *,
564
- null_on_oob: bool = False,
565
- ) -> Expr:
566
- """
567
- Take sublists by multiple indices.
568
-
569
- The indices may be defined in a single column, or by sublists in another
570
- column of dtype `List`.
571
-
572
- Parameters
573
- ----------
574
- indices
575
- Indices to return per sublist
576
- null_on_oob
577
- Behavior if an index is out of bounds:
578
- True -> set as null
579
- False -> raise an error
580
- Note that defaulting to raising an error is much cheaper
581
-
582
- Examples
583
- --------
584
- >>> df = pl.DataFrame({"a": [[3, 2, 1], [], [1, 2, 3, 4, 5]]})
585
- >>> df.with_columns(gather=pl.col("a").list.gather([0, 4], null_on_oob=True))
586
- shape: (3, 2)
587
- ┌─────────────┬──────────────┐
588
- │ a ┆ gather │
589
- │ --- ┆ --- │
590
- │ list[i64] ┆ list[i64] │
591
- ╞═════════════╪══════════════╡
592
- │ [3, 2, 1] ┆ [3, null] │
593
- │ [] ┆ [null, null] │
594
- │ [1, 2, … 5] ┆ [1, 5] │
595
- └─────────────┴──────────────┘
596
- """
597
- indices_pyexpr = parse_into_expression(indices)
598
- return wrap_expr(self._pyexpr.list_gather(indices_pyexpr, null_on_oob))
599
-
600
- def gather_every(
601
- self,
602
- n: int | IntoExprColumn,
603
- offset: int | IntoExprColumn = 0,
604
- ) -> Expr:
605
- """
606
- Take every n-th value start from offset in sublists.
607
-
608
- Parameters
609
- ----------
610
- n
611
- Gather every n-th element.
612
- offset
613
- Starting index.
614
-
615
- Examples
616
- --------
617
- >>> df = pl.DataFrame(
618
- ... {
619
- ... "a": [[1, 2, 3, 4, 5], [6, 7, 8], [9, 10, 11, 12]],
620
- ... "n": [2, 1, 3],
621
- ... "offset": [0, 1, 0],
622
- ... }
623
- ... )
624
- >>> df.with_columns(
625
- ... gather_every=pl.col("a").list.gather_every(
626
- ... n=pl.col("n"), offset=pl.col("offset")
627
- ... )
628
- ... )
629
- shape: (3, 4)
630
- ┌───────────────┬─────┬────────┬──────────────┐
631
- │ a ┆ n ┆ offset ┆ gather_every │
632
- │ --- ┆ --- ┆ --- ┆ --- │
633
- │ list[i64] ┆ i64 ┆ i64 ┆ list[i64] │
634
- ╞═══════════════╪═════╪════════╪══════════════╡
635
- │ [1, 2, … 5] ┆ 2 ┆ 0 ┆ [1, 3, 5] │
636
- │ [6, 7, 8] ┆ 1 ┆ 1 ┆ [7, 8] │
637
- │ [9, 10, … 12] ┆ 3 ┆ 0 ┆ [9, 12] │
638
- └───────────────┴─────┴────────┴──────────────┘
639
- """
640
- n_pyexpr = parse_into_expression(n)
641
- offset_pyexpr = parse_into_expression(offset)
642
- return wrap_expr(self._pyexpr.list_gather_every(n_pyexpr, offset_pyexpr))
643
-
644
- def first(self) -> Expr:
645
- """
646
- Get the first value of the sublists.
647
-
648
- Examples
649
- --------
650
- >>> df = pl.DataFrame({"a": [[3, 2, 1], [], [1, 2]]})
651
- >>> df.with_columns(first=pl.col("a").list.first())
652
- shape: (3, 2)
653
- ┌───────────┬───────┐
654
- │ a ┆ first │
655
- │ --- ┆ --- │
656
- │ list[i64] ┆ i64 │
657
- ╞═══════════╪═══════╡
658
- │ [3, 2, 1] ┆ 3 │
659
- │ [] ┆ null │
660
- │ [1, 2] ┆ 1 │
661
- └───────────┴───────┘
662
- """
663
- return self.get(0, null_on_oob=True)
664
-
665
- def last(self) -> Expr:
666
- """
667
- Get the last value of the sublists.
668
-
669
- Examples
670
- --------
671
- >>> df = pl.DataFrame({"a": [[3, 2, 1], [], [1, 2]]})
672
- >>> df.with_columns(last=pl.col("a").list.last())
673
- shape: (3, 2)
674
- ┌───────────┬──────┐
675
- │ a ┆ last │
676
- │ --- ┆ --- │
677
- │ list[i64] ┆ i64 │
678
- ╞═══════════╪══════╡
679
- │ [3, 2, 1] ┆ 1 │
680
- │ [] ┆ null │
681
- │ [1, 2] ┆ 2 │
682
- └───────────┴──────┘
683
- """
684
- return self.get(-1, null_on_oob=True)
685
-
686
- def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Expr:
687
- """
688
- Check if sublists contain the given item.
689
-
690
- Parameters
691
- ----------
692
- item
693
- Item that will be checked for membership
694
- nulls_equal : bool, default True
695
- If True, treat null as a distinct value. Null values will not propagate.
696
-
697
- Returns
698
- -------
699
- Expr
700
- Expression of data type :class:`Boolean`.
701
-
702
- Examples
703
- --------
704
- >>> df = pl.DataFrame({"a": [[3, 2, 1], [], [1, 2]]})
705
- >>> df.with_columns(contains=pl.col("a").list.contains(1))
706
- shape: (3, 2)
707
- ┌───────────┬──────────┐
708
- │ a ┆ contains │
709
- │ --- ┆ --- │
710
- │ list[i64] ┆ bool │
711
- ╞═══════════╪══════════╡
712
- │ [3, 2, 1] ┆ true │
713
- │ [] ┆ false │
714
- │ [1, 2] ┆ true │
715
- └───────────┴──────────┘
716
- """
717
- item_pyexpr = parse_into_expression(item, str_as_lit=True)
718
- return wrap_expr(self._pyexpr.list_contains(item_pyexpr, nulls_equal))
719
-
720
- def join(self, separator: IntoExprColumn, *, ignore_nulls: bool = True) -> Expr:
721
- """
722
- Join all string items in a sublist and place a separator between them.
723
-
724
- This errors if inner type of list `!= String`.
725
-
726
- Parameters
727
- ----------
728
- separator
729
- string to separate the items with
730
- ignore_nulls
731
- Ignore null values (default).
732
-
733
- If set to ``False``, null values will be propagated.
734
- If the sub-list contains any null values, the output is ``None``.
735
-
736
- Returns
737
- -------
738
- Expr
739
- Expression of data type :class:`String`.
740
-
741
- Examples
742
- --------
743
- >>> df = pl.DataFrame({"s": [["a", "b", "c"], ["x", "y"]]})
744
- >>> df.with_columns(join=pl.col("s").list.join(" "))
745
- shape: (2, 2)
746
- ┌─────────────────┬───────┐
747
- │ s ┆ join │
748
- │ --- ┆ --- │
749
- │ list[str] ┆ str │
750
- ╞═════════════════╪═══════╡
751
- │ ["a", "b", "c"] ┆ a b c │
752
- │ ["x", "y"] ┆ x y │
753
- └─────────────────┴───────┘
754
-
755
- >>> df = pl.DataFrame(
756
- ... {"s": [["a", "b", "c"], ["x", "y"]], "separator": ["*", "_"]}
757
- ... )
758
- >>> df.with_columns(join=pl.col("s").list.join(pl.col("separator")))
759
- shape: (2, 3)
760
- ┌─────────────────┬───────────┬───────┐
761
- │ s ┆ separator ┆ join │
762
- │ --- ┆ --- ┆ --- │
763
- │ list[str] ┆ str ┆ str │
764
- ╞═════════════════╪═══════════╪═══════╡
765
- │ ["a", "b", "c"] ┆ * ┆ a*b*c │
766
- │ ["x", "y"] ┆ _ ┆ x_y │
767
- └─────────────────┴───────────┴───────┘
768
- """
769
- separator_pyexpr = parse_into_expression(separator, str_as_lit=True)
770
- return wrap_expr(self._pyexpr.list_join(separator_pyexpr, ignore_nulls))
771
-
772
- def arg_min(self) -> Expr:
773
- """
774
- Retrieve the index of the minimal value in every sublist.
775
-
776
- Returns
777
- -------
778
- Expr
779
- Expression of data type :class:`UInt32` or :class:`UInt64`
780
- (depending on compilation).
781
-
782
- Examples
783
- --------
784
- >>> df = pl.DataFrame(
785
- ... {
786
- ... "a": [[1, 2], [2, 1]],
787
- ... }
788
- ... )
789
- >>> df.with_columns(arg_min=pl.col("a").list.arg_min())
790
- shape: (2, 2)
791
- ┌───────────┬─────────┐
792
- │ a ┆ arg_min │
793
- │ --- ┆ --- │
794
- │ list[i64] ┆ u32 │
795
- ╞═══════════╪═════════╡
796
- │ [1, 2] ┆ 0 │
797
- │ [2, 1] ┆ 1 │
798
- └───────────┴─────────┘
799
- """
800
- return wrap_expr(self._pyexpr.list_arg_min())
801
-
802
- def arg_max(self) -> Expr:
803
- """
804
- Retrieve the index of the maximum value in every sublist.
805
-
806
- Returns
807
- -------
808
- Expr
809
- Expression of data type :class:`UInt32` or :class:`UInt64`
810
- (depending on compilation).
811
-
812
- Examples
813
- --------
814
- >>> df = pl.DataFrame(
815
- ... {
816
- ... "a": [[1, 2], [2, 1]],
817
- ... }
818
- ... )
819
- >>> df.with_columns(arg_max=pl.col("a").list.arg_max())
820
- shape: (2, 2)
821
- ┌───────────┬─────────┐
822
- │ a ┆ arg_max │
823
- │ --- ┆ --- │
824
- │ list[i64] ┆ u32 │
825
- ╞═══════════╪═════════╡
826
- │ [1, 2] ┆ 1 │
827
- │ [2, 1] ┆ 0 │
828
- └───────────┴─────────┘
829
- """
830
- return wrap_expr(self._pyexpr.list_arg_max())
831
-
832
- def diff(self, n: int = 1, null_behavior: NullBehavior = "ignore") -> Expr:
833
- """
834
- Calculate the first discrete difference between shifted items of every sublist.
835
-
836
- Parameters
837
- ----------
838
- n
839
- Number of slots to shift.
840
- null_behavior : {'ignore', 'drop'}
841
- How to handle null values.
842
-
843
- Examples
844
- --------
845
- >>> df = pl.DataFrame({"n": [[1, 2, 3, 4], [10, 2, 1]]})
846
- >>> df.with_columns(diff=pl.col("n").list.diff())
847
- shape: (2, 2)
848
- ┌─────────────┬────────────────┐
849
- │ n ┆ diff │
850
- │ --- ┆ --- │
851
- │ list[i64] ┆ list[i64] │
852
- ╞═════════════╪════════════════╡
853
- │ [1, 2, … 4] ┆ [null, 1, … 1] │
854
- │ [10, 2, 1] ┆ [null, -8, -1] │
855
- └─────────────┴────────────────┘
856
-
857
- >>> df.with_columns(diff=pl.col("n").list.diff(n=2))
858
- shape: (2, 2)
859
- ┌─────────────┬───────────────────┐
860
- │ n ┆ diff │
861
- │ --- ┆ --- │
862
- │ list[i64] ┆ list[i64] │
863
- ╞═════════════╪═══════════════════╡
864
- │ [1, 2, … 4] ┆ [null, null, … 2] │
865
- │ [10, 2, 1] ┆ [null, null, -9] │
866
- └─────────────┴───────────────────┘
867
-
868
- >>> df.with_columns(diff=pl.col("n").list.diff(n=2, null_behavior="drop"))
869
- shape: (2, 2)
870
- ┌─────────────┬───────────┐
871
- │ n ┆ diff │
872
- │ --- ┆ --- │
873
- │ list[i64] ┆ list[i64] │
874
- ╞═════════════╪═══════════╡
875
- │ [1, 2, … 4] ┆ [2, 2] │
876
- │ [10, 2, 1] ┆ [-9] │
877
- └─────────────┴───────────┘
878
- """
879
- return wrap_expr(self._pyexpr.list_diff(n, null_behavior))
880
-
881
- def shift(self, n: int | IntoExprColumn = 1) -> Expr:
882
- """
883
- Shift list values by the given number of indices.
884
-
885
- Parameters
886
- ----------
887
- n
888
- Number of indices to shift forward. If a negative value is passed, values
889
- are shifted in the opposite direction instead.
890
-
891
- Notes
892
- -----
893
- This method is similar to the `LAG` operation in SQL when the value for `n`
894
- is positive. With a negative value for `n`, it is similar to `LEAD`.
895
-
896
- Examples
897
- --------
898
- By default, list values are shifted forward by one index.
899
-
900
- >>> df = pl.DataFrame({"a": [[1, 2, 3], [4, 5]]})
901
- >>> df.with_columns(shift=pl.col("a").list.shift())
902
- shape: (2, 2)
903
- ┌───────────┬──────────────┐
904
- │ a ┆ shift │
905
- │ --- ┆ --- │
906
- │ list[i64] ┆ list[i64] │
907
- ╞═══════════╪══════════════╡
908
- │ [1, 2, 3] ┆ [null, 1, 2] │
909
- │ [4, 5] ┆ [null, 4] │
910
- └───────────┴──────────────┘
911
-
912
- Pass a negative value to shift in the opposite direction instead.
913
-
914
- >>> df.with_columns(shift=pl.col("a").list.shift(-2))
915
- shape: (2, 2)
916
- ┌───────────┬─────────────────┐
917
- │ a ┆ shift │
918
- │ --- ┆ --- │
919
- │ list[i64] ┆ list[i64] │
920
- ╞═══════════╪═════════════════╡
921
- │ [1, 2, 3] ┆ [3, null, null] │
922
- │ [4, 5] ┆ [null, null] │
923
- └───────────┴─────────────────┘
924
- """
925
- n_pyexpr = parse_into_expression(n)
926
- return wrap_expr(self._pyexpr.list_shift(n_pyexpr))
927
-
928
- def slice(
929
- self, offset: int | str | Expr, length: int | str | Expr | None = None
930
- ) -> Expr:
931
- """
932
- Slice every sublist.
933
-
934
- Parameters
935
- ----------
936
- offset
937
- Start index. Negative indexing is supported.
938
- length
939
- Length of the slice. If set to `None` (default), the slice is taken to the
940
- end of the list.
941
-
942
- Examples
943
- --------
944
- >>> df = pl.DataFrame({"a": [[1, 2, 3, 4], [10, 2, 1]]})
945
- >>> df.with_columns(slice=pl.col("a").list.slice(1, 2))
946
- shape: (2, 2)
947
- ┌─────────────┬───────────┐
948
- │ a ┆ slice │
949
- │ --- ┆ --- │
950
- │ list[i64] ┆ list[i64] │
951
- ╞═════════════╪═══════════╡
952
- │ [1, 2, … 4] ┆ [2, 3] │
953
- │ [10, 2, 1] ┆ [2, 1] │
954
- └─────────────┴───────────┘
955
- """
956
- offset_pyexpr = parse_into_expression(offset)
957
- length_pyexpr = parse_into_expression(length)
958
- return wrap_expr(self._pyexpr.list_slice(offset_pyexpr, length_pyexpr))
959
-
960
- def head(self, n: int | str | Expr = 5) -> Expr:
961
- """
962
- Slice the first `n` values of every sublist.
963
-
964
- Parameters
965
- ----------
966
- n
967
- Number of values to return for each sublist.
968
-
969
- Examples
970
- --------
971
- >>> df = pl.DataFrame({"a": [[1, 2, 3, 4], [10, 2, 1]]})
972
- >>> df.with_columns(head=pl.col("a").list.head(2))
973
- shape: (2, 2)
974
- ┌─────────────┬───────────┐
975
- │ a ┆ head │
976
- │ --- ┆ --- │
977
- │ list[i64] ┆ list[i64] │
978
- ╞═════════════╪═══════════╡
979
- │ [1, 2, … 4] ┆ [1, 2] │
980
- │ [10, 2, 1] ┆ [10, 2] │
981
- └─────────────┴───────────┘
982
- """
983
- return self.slice(0, n)
984
-
985
- def tail(self, n: int | str | Expr = 5) -> Expr:
986
- """
987
- Slice the last `n` values of every sublist.
988
-
989
- Parameters
990
- ----------
991
- n
992
- Number of values to return for each sublist.
993
-
994
- Examples
995
- --------
996
- >>> df = pl.DataFrame({"a": [[1, 2, 3, 4], [10, 2, 1]]})
997
- >>> df.with_columns(tail=pl.col("a").list.tail(2))
998
- shape: (2, 2)
999
- ┌─────────────┬───────────┐
1000
- │ a ┆ tail │
1001
- │ --- ┆ --- │
1002
- │ list[i64] ┆ list[i64] │
1003
- ╞═════════════╪═══════════╡
1004
- │ [1, 2, … 4] ┆ [3, 4] │
1005
- │ [10, 2, 1] ┆ [2, 1] │
1006
- └─────────────┴───────────┘
1007
- """
1008
- n_pyexpr = parse_into_expression(n)
1009
- return wrap_expr(self._pyexpr.list_tail(n_pyexpr))
1010
-
1011
- def explode(self) -> Expr:
1012
- """
1013
- Returns a column with a separate row for every list element.
1014
-
1015
- Returns
1016
- -------
1017
- Expr
1018
- Expression with the data type of the list elements.
1019
-
1020
- See Also
1021
- --------
1022
- Expr.reshape: Reshape this Expr to a flat Series or a Series of Lists.
1023
-
1024
- Examples
1025
- --------
1026
- >>> df = pl.DataFrame({"a": [[1, 2, 3], [4, 5, 6]]})
1027
- >>> df.select(pl.col("a").list.explode())
1028
- shape: (6, 1)
1029
- ┌─────┐
1030
- │ a │
1031
- │ --- │
1032
- │ i64 │
1033
- ╞═════╡
1034
- │ 1 │
1035
- │ 2 │
1036
- │ 3 │
1037
- │ 4 │
1038
- │ 5 │
1039
- │ 6 │
1040
- └─────┘
1041
- """
1042
- return wrap_expr(self._pyexpr.explode())
1043
-
1044
- def count_matches(self, element: IntoExpr) -> Expr:
1045
- """
1046
- Count how often the value produced by `element` occurs.
1047
-
1048
- Parameters
1049
- ----------
1050
- element
1051
- An expression that produces a single value
1052
-
1053
- Examples
1054
- --------
1055
- >>> df = pl.DataFrame({"a": [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
1056
- >>> df.with_columns(number_of_twos=pl.col("a").list.count_matches(2))
1057
- shape: (5, 2)
1058
- ┌─────────────┬────────────────┐
1059
- │ a ┆ number_of_twos │
1060
- │ --- ┆ --- │
1061
- │ list[i64] ┆ u32 │
1062
- ╞═════════════╪════════════════╡
1063
- │ [0] ┆ 0 │
1064
- │ [1] ┆ 0 │
1065
- │ [1, 2, … 2] ┆ 2 │
1066
- │ [1, 2, 1] ┆ 1 │
1067
- │ [4, 4] ┆ 0 │
1068
- └─────────────┴────────────────┘
1069
- """
1070
- element_pyexpr = parse_into_expression(element, str_as_lit=True)
1071
- return wrap_expr(self._pyexpr.list_count_matches(element_pyexpr))
1072
-
1073
- def to_array(self, width: int) -> Expr:
1074
- """
1075
- Convert a List column into an Array column with the same inner data type.
1076
-
1077
- Parameters
1078
- ----------
1079
- width
1080
- Width of the resulting Array column.
1081
-
1082
- Returns
1083
- -------
1084
- Expr
1085
- Expression of data type :class:`Array`.
1086
-
1087
- Examples
1088
- --------
1089
- >>> df = pl.DataFrame(
1090
- ... data={"a": [[1, 2], [3, 4]]},
1091
- ... schema={"a": pl.List(pl.Int8)},
1092
- ... )
1093
- >>> df.with_columns(array=pl.col("a").list.to_array(2))
1094
- shape: (2, 2)
1095
- ┌──────────┬──────────────┐
1096
- │ a ┆ array │
1097
- │ --- ┆ --- │
1098
- │ list[i8] ┆ array[i8, 2] │
1099
- ╞══════════╪══════════════╡
1100
- │ [1, 2] ┆ [1, 2] │
1101
- │ [3, 4] ┆ [3, 4] │
1102
- └──────────┴──────────────┘
1103
- """
1104
- return wrap_expr(self._pyexpr.list_to_array(width))
1105
-
1106
- def to_struct(
1107
- self,
1108
- n_field_strategy: ListToStructWidthStrategy | None = None,
1109
- fields: Sequence[str] | Callable[[int], str] | None = None,
1110
- upper_bound: int | None = None,
1111
- ) -> Expr:
1112
- """
1113
- Convert the Series of type `List` to a Series of type `Struct`.
1114
-
1115
- Parameters
1116
- ----------
1117
- n_field_strategy : {'first_non_null', 'max_width'}
1118
- Deprecated and ignored.
1119
- fields
1120
- If the name and number of the desired fields is known in advance
1121
- a list of field names can be given, which will be assigned by index.
1122
- Otherwise, to dynamically assign field names, a custom function can be
1123
- used; if neither are set, fields will be `field_0, field_1 .. field_n`.
1124
- upper_bound
1125
- A polars expression needs to be able to evaluate the output datatype at all
1126
- times, so the caller must provide an upper bound of the number of struct
1127
- fields that will be created if `fields` is not a sequence of field names.
1128
-
1129
- .. versionchanged: 1.33.0
1130
- The `n_field_strategy` parameter is ignored and deprecated. The `fields`
1131
- needs to be a sequence of field names or the upper bound is regarded as
1132
- ground truth.
1133
-
1134
- Examples
1135
- --------
1136
- Convert list to struct with default field name assignment:
1137
-
1138
- >>> df = pl.DataFrame({"n": [[0, 1], [0, 1, 2]]})
1139
- >>> df.with_columns(
1140
- ... struct=pl.col("n").list.to_struct(upper_bound=2)
1141
- ... ) # doctest: +SKIP
1142
- shape: (2, 2)
1143
- ┌───────────┬───────────┐
1144
- │ n ┆ struct │
1145
- │ --- ┆ --- │
1146
- │ list[i64] ┆ struct[2] │ # <- struct with 2 fields
1147
- ╞═══════════╪═══════════╡
1148
- │ [0, 1] ┆ {0,1} │ # OK
1149
- │ [0, 1, 2] ┆ {0,1} │ # NOT OK - last value missing
1150
- └───────────┴───────────┘
1151
-
1152
- Convert list to struct with field name assignment by function/index:
1153
-
1154
- >>> df = pl.DataFrame({"n": [[0, 1], [2, 3]]})
1155
- >>> df.select(
1156
- ... pl.col("n").list.to_struct(fields=lambda idx: f"n{idx}", upper_bound=2)
1157
- ... ).rows(named=True) # doctest: +SKIP
1158
- [{'n': {'n0': 0, 'n1': 1}}, {'n': {'n0': 2, 'n1': 3}}]
1159
-
1160
- Convert list to struct with field name assignment by index from a list of names:
1161
-
1162
- >>> df.select(pl.col("n").list.to_struct(fields=["one", "two"])).rows(
1163
- ... named=True
1164
- ... )
1165
- [{'n': {'one': 0, 'two': 1}}, {'n': {'one': 2, 'two': 3}}]
1166
- """
1167
- if n_field_strategy is not None:
1168
- issue_warning(
1169
- "`Expr.list.to_struct` with `n_field_strategy` is deprecated and has no effect on execution.",
1170
- DeprecationWarning,
1171
- )
1172
-
1173
- if not isinstance(fields, Sequence):
1174
- if upper_bound is None:
1175
- msg = "`Expr.list.to_struct` requires either `fields` to be a sequence or `upper_bound` to be set.\n\nThis used to be allowed but produced unpredictable results."
1176
- raise exceptions.InvalidOperationError(msg)
1177
-
1178
- if fields is None:
1179
- fields = [f"field_{i}" for i in range(upper_bound)]
1180
- else:
1181
- fields = [fields(i) for i in range(upper_bound)]
1182
-
1183
- return wrap_expr(self._pyexpr.list_to_struct(fields))
1184
-
1185
- def eval(self, expr: Expr, *, parallel: bool = False) -> Expr:
1186
- """
1187
- Run any polars expression against the lists' elements.
1188
-
1189
- Parameters
1190
- ----------
1191
- expr
1192
- Expression to run. Note that you can select an element with `pl.first()`, or
1193
- `pl.col()`
1194
- parallel
1195
- Run all expression parallel. Don't activate this blindly.
1196
- Parallelism is worth it if there is enough work to do per thread.
1197
-
1198
- This likely should not be used in the group by context, because we already
1199
- parallel execution per group
1200
-
1201
- Examples
1202
- --------
1203
- >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
1204
- >>> df.with_columns(
1205
- ... rank=pl.concat_list("a", "b").list.eval(pl.element().rank())
1206
- ... )
1207
- shape: (3, 3)
1208
- ┌─────┬─────┬────────────┐
1209
- │ a ┆ b ┆ rank │
1210
- │ --- ┆ --- ┆ --- │
1211
- │ i64 ┆ i64 ┆ list[f64] │
1212
- ╞═════╪═════╪════════════╡
1213
- │ 1 ┆ 4 ┆ [1.0, 2.0] │
1214
- │ 8 ┆ 5 ┆ [2.0, 1.0] │
1215
- │ 3 ┆ 2 ┆ [2.0, 1.0] │
1216
- └─────┴─────┴────────────┘
1217
- """
1218
- return wrap_expr(self._pyexpr.list_eval(expr._pyexpr, parallel))
1219
-
1220
- def filter(self, predicate: Expr) -> Expr:
1221
- """
1222
- Filter elements in each list by a boolean expression.
1223
-
1224
- Parameters
1225
- ----------
1226
- predicate
1227
- A boolean expression that is evaluated per list element.
1228
- You can refer to the current element with `pl.element()`.
1229
-
1230
- Examples
1231
- --------
1232
- >>> import polars as pl
1233
- >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
1234
- >>> df.with_columns(
1235
- ... evens=pl.concat_list("a", "b").list.filter(pl.element() % 2 == 0)
1236
- ... )
1237
- shape: (3, 3)
1238
- ┌─────┬─────┬───────────┐
1239
- │ a ┆ b ┆ evens │
1240
- │ --- ┆ --- ┆ --- │
1241
- │ i64 ┆ i64 ┆ list[i64] │
1242
- ╞═════╪═════╪═══════════╡
1243
- │ 1 ┆ 4 ┆ [4] │
1244
- │ 8 ┆ 5 ┆ [8] │
1245
- │ 3 ┆ 2 ┆ [2] │
1246
- └─────┴─────┴───────────┘
1247
- """
1248
- return wrap_expr(self._pyexpr.list_filter(predicate._pyexpr))
1249
-
1250
- def set_union(self, other: IntoExpr | Collection[Any]) -> Expr:
1251
- """
1252
- Compute the SET UNION between the elements in this list and the elements of `other`.
1253
-
1254
- Parameters
1255
- ----------
1256
- other
1257
- Right hand side of the set operation.
1258
-
1259
- Examples
1260
- --------
1261
- >>> df = pl.DataFrame(
1262
- ... {
1263
- ... "a": [[1, 2, 3], [], [None, 3], [5, 6, 7]],
1264
- ... "b": [[2, 3, 4], [3], [3, 4, None], [6, 8]],
1265
- ... }
1266
- ... )
1267
- >>> df.with_columns(
1268
- ... union=pl.col("a").list.set_union("b")
1269
- ... ) # doctest: +IGNORE_RESULT
1270
- shape: (4, 3)
1271
- ┌───────────┬──────────────┬───────────────┐
1272
- │ a ┆ b ┆ union │
1273
- │ --- ┆ --- ┆ --- │
1274
- │ list[i64] ┆ list[i64] ┆ list[i64] │
1275
- ╞═══════════╪══════════════╪═══════════════╡
1276
- │ [1, 2, 3] ┆ [2, 3, 4] ┆ [1, 2, 3, 4] │
1277
- │ [] ┆ [3] ┆ [3] │
1278
- │ [null, 3] ┆ [3, 4, null] ┆ [null, 3, 4] │
1279
- │ [5, 6, 7] ┆ [6, 8] ┆ [5, 6, 7, 8] │
1280
- └───────────┴──────────────┴───────────────┘
1281
- """ # noqa: W505.
1282
- if isinstance(other, Collection) and not isinstance(other, str):
1283
- if not isinstance(other, (Sequence, pl.Series, pl.DataFrame)):
1284
- other = list(other) # eg: set, frozenset, etc
1285
- other_pyexpr = F.lit(other)._pyexpr
1286
- else:
1287
- other_pyexpr = parse_into_expression(other)
1288
- return wrap_expr(self._pyexpr.list_set_operation(other_pyexpr, "union"))
1289
-
1290
- def set_difference(self, other: IntoExpr | Collection[Any]) -> Expr:
1291
- """
1292
- Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
1293
-
1294
- Parameters
1295
- ----------
1296
- other
1297
- Right hand side of the set operation.
1298
-
1299
- Examples
1300
- --------
1301
- >>> df = pl.DataFrame(
1302
- ... {
1303
- ... "a": [[1, 2, 3], [], [None, 3], [5, 6, 7]],
1304
- ... "b": [[2, 3, 4], [3], [3, 4, None], [6, 8]],
1305
- ... }
1306
- ... )
1307
- >>> df.with_columns(difference=pl.col("a").list.set_difference("b"))
1308
- shape: (4, 3)
1309
- ┌───────────┬──────────────┬────────────┐
1310
- │ a ┆ b ┆ difference │
1311
- │ --- ┆ --- ┆ --- │
1312
- │ list[i64] ┆ list[i64] ┆ list[i64] │
1313
- ╞═══════════╪══════════════╪════════════╡
1314
- │ [1, 2, 3] ┆ [2, 3, 4] ┆ [1] │
1315
- │ [] ┆ [3] ┆ [] │
1316
- │ [null, 3] ┆ [3, 4, null] ┆ [] │
1317
- │ [5, 6, 7] ┆ [6, 8] ┆ [5, 7] │
1318
- └───────────┴──────────────┴────────────┘
1319
-
1320
- See Also
1321
- --------
1322
- polars.Expr.list.diff: Calculates the n-th discrete difference of every sublist.
1323
- """ # noqa: W505.
1324
- if isinstance(other, Collection) and not isinstance(other, str):
1325
- if not isinstance(other, (Sequence, pl.Series, pl.DataFrame)):
1326
- other = list(other) # eg: set, frozenset, etc
1327
- other_pyexpr = F.lit(other)._pyexpr
1328
- else:
1329
- other_pyexpr = parse_into_expression(other)
1330
- return wrap_expr(self._pyexpr.list_set_operation(other_pyexpr, "difference"))
1331
-
1332
- def set_intersection(self, other: IntoExpr | Collection[Any]) -> Expr:
1333
- """
1334
- Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
1335
-
1336
- Parameters
1337
- ----------
1338
- other
1339
- Right hand side of the set operation.
1340
-
1341
- Examples
1342
- --------
1343
- >>> df = pl.DataFrame(
1344
- ... {
1345
- ... "a": [[1, 2, 3], [], [None, 3], [5, 6, 7]],
1346
- ... "b": [[2, 3, 4], [3], [3, 4, None], [6, 8]],
1347
- ... }
1348
- ... )
1349
- >>> df.with_columns(intersection=pl.col("a").list.set_intersection("b"))
1350
- shape: (4, 3)
1351
- ┌───────────┬──────────────┬──────────────┐
1352
- │ a ┆ b ┆ intersection │
1353
- │ --- ┆ --- ┆ --- │
1354
- │ list[i64] ┆ list[i64] ┆ list[i64] │
1355
- ╞═══════════╪══════════════╪══════════════╡
1356
- │ [1, 2, 3] ┆ [2, 3, 4] ┆ [2, 3] │
1357
- │ [] ┆ [3] ┆ [] │
1358
- │ [null, 3] ┆ [3, 4, null] ┆ [null, 3] │
1359
- │ [5, 6, 7] ┆ [6, 8] ┆ [6] │
1360
- └───────────┴──────────────┴──────────────┘
1361
- """ # noqa: W505.
1362
- if isinstance(other, Collection) and not isinstance(other, str):
1363
- if not isinstance(other, (Sequence, pl.Series, pl.DataFrame)):
1364
- other = list(other) # eg: set, frozenset, etc
1365
- other_pyexpr = F.lit(other)._pyexpr
1366
- else:
1367
- other_pyexpr = parse_into_expression(other)
1368
- return wrap_expr(self._pyexpr.list_set_operation(other_pyexpr, "intersection"))
1369
-
1370
- def set_symmetric_difference(self, other: IntoExpr | Collection[Any]) -> Expr:
1371
- """
1372
- Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
1373
-
1374
- Parameters
1375
- ----------
1376
- other
1377
- Right hand side of the set operation.
1378
-
1379
- Examples
1380
- --------
1381
- >>> df = pl.DataFrame(
1382
- ... {
1383
- ... "a": [[1, 2, 3], [], [None, 3], [5, 6, 7]],
1384
- ... "b": [[2, 3, 4], [3], [3, 4, None], [6, 8]],
1385
- ... }
1386
- ... )
1387
- >>> df.with_columns(sdiff=pl.col("b").list.set_symmetric_difference("a"))
1388
- shape: (4, 3)
1389
- ┌───────────┬──────────────┬───────────┐
1390
- │ a ┆ b ┆ sdiff │
1391
- │ --- ┆ --- ┆ --- │
1392
- │ list[i64] ┆ list[i64] ┆ list[i64] │
1393
- ╞═══════════╪══════════════╪═══════════╡
1394
- │ [1, 2, 3] ┆ [2, 3, 4] ┆ [4, 1] │
1395
- │ [] ┆ [3] ┆ [3] │
1396
- │ [null, 3] ┆ [3, 4, null] ┆ [4] │
1397
- │ [5, 6, 7] ┆ [6, 8] ┆ [8, 5, 7] │
1398
- └───────────┴──────────────┴───────────┘
1399
- """ # noqa: W505.
1400
- if isinstance(other, Collection) and not isinstance(other, str):
1401
- if not isinstance(other, (Sequence, pl.Series, pl.DataFrame)):
1402
- other = list(other) # eg: set, frozenset, etc
1403
- other_pyexpr = F.lit(other)._pyexpr
1404
- else:
1405
- other_pyexpr = parse_into_expression(other)
1406
- return wrap_expr(
1407
- self._pyexpr.list_set_operation(other_pyexpr, "symmetric_difference")
1408
- )