polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
polars/expr/array.py ADDED
@@ -0,0 +1,964 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ from typing import TYPE_CHECKING, Callable
5
+
6
+ from polars._utils.parse import parse_into_expression
7
+ from polars._utils.wrap import wrap_expr
8
+
9
+ if TYPE_CHECKING:
10
+ from polars import Expr
11
+ from polars._typing import IntoExpr, IntoExprColumn
12
+
13
+
14
+ class ExprArrayNameSpace:
15
+ """Namespace for array related expressions."""
16
+
17
+ _accessor = "arr"
18
+
19
+ def __init__(self, expr: Expr) -> None:
20
+ self._pyexpr = expr._pyexpr
21
+
22
+ def len(self) -> Expr:
23
+ """
24
+ Return the number of elements in each array.
25
+
26
+ Examples
27
+ --------
28
+ >>> df = pl.DataFrame(
29
+ ... data={"a": [[1, 2], [4, 3]]},
30
+ ... schema={"a": pl.Array(pl.Int64, 2)},
31
+ ... )
32
+ >>> df.select(pl.col("a").arr.len())
33
+ shape: (2, 1)
34
+ ┌─────┐
35
+ │ a │
36
+ │ --- │
37
+ │ u32 │
38
+ ╞═════╡
39
+ │ 2 │
40
+ │ 2 │
41
+ └─────┘
42
+ """
43
+ return wrap_expr(self._pyexpr.arr_len())
44
+
45
+ def slice(
46
+ self,
47
+ offset: int | str | Expr,
48
+ length: int | str | Expr | None = None,
49
+ *,
50
+ as_array: bool = False,
51
+ ) -> Expr:
52
+ """
53
+ Slice every subarray.
54
+
55
+ Parameters
56
+ ----------
57
+ offset
58
+ Start index. Negative indexing is supported.
59
+ length
60
+ Length of the slice. If set to `None` (default), the slice is taken to the
61
+ end of the list.
62
+ as_array
63
+ Return result as a fixed-length `Array`, otherwise as a `List`.
64
+ If true `length` and `offset` must be constant values.
65
+
66
+ Examples
67
+ --------
68
+ >>> df = pl.DataFrame(
69
+ ... data={"a": [[1, 2], [4, 3]]},
70
+ ... schema={"a": pl.Array(pl.Int64, 2)},
71
+ ... )
72
+ >>> df.select(pl.col("a").arr.slice(0, 1))
73
+ shape: (2, 1)
74
+ ┌───────────┐
75
+ │ a │
76
+ │ --- │
77
+ │ list[i64] │
78
+ ╞═══════════╡
79
+ │ [1] │
80
+ │ [4] │
81
+ └───────────┘
82
+ >>> df = pl.DataFrame(
83
+ ... data={"a": [[1, 2], [4, 3]]},
84
+ ... schema={"a": pl.Array(pl.Int64, 2)},
85
+ ... )
86
+ >>> df.select(pl.col("a").arr.slice(0, 1, as_array=True))
87
+ shape: (2, 1)
88
+ ┌───────────────┐
89
+ │ a │
90
+ │ --- │
91
+ │ array[i64, 1] │
92
+ ╞═══════════════╡
93
+ │ [1] │
94
+ │ [4] │
95
+ └───────────────┘
96
+ """
97
+ offset_pyexpr = parse_into_expression(offset)
98
+ length_pyexpr = parse_into_expression(length) if length is not None else None
99
+ return wrap_expr(self._pyexpr.arr_slice(offset_pyexpr, length_pyexpr, as_array))
100
+
101
+ def head(self, n: int | str | Expr = 5, *, as_array: bool = False) -> Expr:
102
+ """
103
+ Get the first `n` elements of the sub-arrays.
104
+
105
+ Parameters
106
+ ----------
107
+ n
108
+ Number of values to return for each sublist.
109
+ as_array
110
+ Return result as a fixed-length `Array`, otherwise as a `List`.
111
+ If true `n` must be a constant value.
112
+
113
+ Examples
114
+ --------
115
+ >>> df = pl.DataFrame(
116
+ ... data={"a": [[1, 2], [4, 3]]},
117
+ ... schema={"a": pl.Array(pl.Int64, 2)},
118
+ ... )
119
+ >>> df.select(pl.col("a").arr.head(1))
120
+ shape: (2, 1)
121
+ ┌───────────┐
122
+ │ a │
123
+ │ --- │
124
+ │ list[i64] │
125
+ ╞═══════════╡
126
+ │ [1] │
127
+ │ [4] │
128
+ └───────────┘
129
+ >>> df = pl.DataFrame(
130
+ ... data={"a": [[1, 2], [4, 3]]},
131
+ ... schema={"a": pl.Array(pl.Int64, 2)},
132
+ ... )
133
+ >>> df.select(pl.col("a").arr.head(1, as_array=True))
134
+ shape: (2, 1)
135
+ ┌───────────────┐
136
+ │ a │
137
+ │ --- │
138
+ │ array[i64, 1] │
139
+ ╞═══════════════╡
140
+ │ [1] │
141
+ │ [4] │
142
+ └───────────────┘
143
+ """
144
+ return self.slice(0, n, as_array=as_array)
145
+
146
+ def tail(self, n: int | str | Expr = 5, *, as_array: bool = False) -> Expr:
147
+ """
148
+ Slice the last `n` values of every sublist.
149
+
150
+ Parameters
151
+ ----------
152
+ n
153
+ Number of values to return for each sublist.
154
+ as_array
155
+ Return result as a fixed-length `Array`, otherwise as a `List`.
156
+ If true `n` must be a constant value.
157
+
158
+ Examples
159
+ --------
160
+ >>> df = pl.DataFrame(
161
+ ... data={"a": [[1, 2], [4, 3]]},
162
+ ... schema={"a": pl.Array(pl.Int64, 2)},
163
+ ... )
164
+ >>> df.select(pl.col("a").arr.tail(1))
165
+ shape: (2, 1)
166
+ ┌───────────┐
167
+ │ a │
168
+ │ --- │
169
+ │ list[i64] │
170
+ ╞═══════════╡
171
+ │ [2] │
172
+ │ [3] │
173
+ └───────────┘
174
+ >>> df = pl.DataFrame(
175
+ ... data={"a": [[1, 2], [4, 3]]},
176
+ ... schema={"a": pl.Array(pl.Int64, 2)},
177
+ ... )
178
+ >>> df.select(pl.col("a").arr.tail(1, as_array=True))
179
+ shape: (2, 1)
180
+ ┌───────────────┐
181
+ │ a │
182
+ │ --- │
183
+ │ array[i64, 1] │
184
+ ╞═══════════════╡
185
+ │ [2] │
186
+ │ [3] │
187
+ └───────────────┘
188
+ """
189
+ n_pyexpr = parse_into_expression(n)
190
+ return wrap_expr(self._pyexpr.arr_tail(n_pyexpr, as_array))
191
+
192
+ def min(self) -> Expr:
193
+ """
194
+ Compute the min values of the sub-arrays.
195
+
196
+ Examples
197
+ --------
198
+ >>> df = pl.DataFrame(
199
+ ... data={"a": [[1, 2], [4, 3]]},
200
+ ... schema={"a": pl.Array(pl.Int64, 2)},
201
+ ... )
202
+ >>> df.select(pl.col("a").arr.min())
203
+ shape: (2, 1)
204
+ ┌─────┐
205
+ │ a │
206
+ │ --- │
207
+ │ i64 │
208
+ ╞═════╡
209
+ │ 1 │
210
+ │ 3 │
211
+ └─────┘
212
+ """
213
+ return wrap_expr(self._pyexpr.arr_min())
214
+
215
+ def max(self) -> Expr:
216
+ """
217
+ Compute the max values of the sub-arrays.
218
+
219
+ Examples
220
+ --------
221
+ >>> df = pl.DataFrame(
222
+ ... data={"a": [[1, 2], [4, 3]]},
223
+ ... schema={"a": pl.Array(pl.Int64, 2)},
224
+ ... )
225
+ >>> df.select(pl.col("a").arr.max())
226
+ shape: (2, 1)
227
+ ┌─────┐
228
+ │ a │
229
+ │ --- │
230
+ │ i64 │
231
+ ╞═════╡
232
+ │ 2 │
233
+ │ 4 │
234
+ └─────┘
235
+ """
236
+ return wrap_expr(self._pyexpr.arr_max())
237
+
238
+ def sum(self) -> Expr:
239
+ """
240
+ Compute the sum values of the sub-arrays.
241
+
242
+ Examples
243
+ --------
244
+ >>> df = pl.DataFrame(
245
+ ... data={"a": [[1, 2], [4, 3]]},
246
+ ... schema={"a": pl.Array(pl.Int64, 2)},
247
+ ... )
248
+ >>> df.select(pl.col("a").arr.sum())
249
+ shape: (2, 1)
250
+ ┌─────┐
251
+ │ a │
252
+ │ --- │
253
+ │ i64 │
254
+ ╞═════╡
255
+ │ 3 │
256
+ │ 7 │
257
+ └─────┘
258
+ """
259
+ return wrap_expr(self._pyexpr.arr_sum())
260
+
261
+ def std(self, ddof: int = 1) -> Expr:
262
+ """
263
+ Compute the std of the values of the sub-arrays.
264
+
265
+ Examples
266
+ --------
267
+ >>> df = pl.DataFrame(
268
+ ... data={"a": [[1, 2], [4, 3]]},
269
+ ... schema={"a": pl.Array(pl.Int64, 2)},
270
+ ... )
271
+ >>> df.select(pl.col("a").arr.std())
272
+ shape: (2, 1)
273
+ ┌──────────┐
274
+ │ a │
275
+ │ --- │
276
+ │ f64 │
277
+ ╞══════════╡
278
+ │ 0.707107 │
279
+ │ 0.707107 │
280
+ └──────────┘
281
+ """
282
+ return wrap_expr(self._pyexpr.arr_std(ddof))
283
+
284
+ def var(self, ddof: int = 1) -> Expr:
285
+ """
286
+ Compute the var of the values of the sub-arrays.
287
+
288
+ Examples
289
+ --------
290
+ >>> df = pl.DataFrame(
291
+ ... data={"a": [[1, 2], [4, 3]]},
292
+ ... schema={"a": pl.Array(pl.Int64, 2)},
293
+ ... )
294
+ >>> df.select(pl.col("a").arr.var())
295
+ shape: (2, 1)
296
+ ┌─────┐
297
+ │ a │
298
+ │ --- │
299
+ │ f64 │
300
+ ╞═════╡
301
+ │ 0.5 │
302
+ │ 0.5 │
303
+ └─────┘
304
+ """
305
+ return wrap_expr(self._pyexpr.arr_var(ddof))
306
+
307
+ def mean(self) -> Expr:
308
+ """
309
+ Compute the mean of the values of the sub-arrays.
310
+
311
+ Examples
312
+ --------
313
+ >>> df = pl.DataFrame(
314
+ ... data={"a": [[1, 2, 3], [1, 1, 16]]},
315
+ ... schema={"a": pl.Array(pl.Int64, 3)},
316
+ ... )
317
+ >>> df.select(pl.col("a").arr.mean())
318
+ shape: (2, 1)
319
+ ┌─────┐
320
+ │ a │
321
+ │ --- │
322
+ │ f64 │
323
+ ╞═════╡
324
+ │ 2.0 │
325
+ │ 6.0 │
326
+ └─────┘
327
+ """
328
+ return wrap_expr(self._pyexpr.arr_mean())
329
+
330
+ def median(self) -> Expr:
331
+ """
332
+ Compute the median of the values of the sub-arrays.
333
+
334
+ Examples
335
+ --------
336
+ >>> df = pl.DataFrame(
337
+ ... data={"a": [[1, 2], [4, 3]]},
338
+ ... schema={"a": pl.Array(pl.Int64, 2)},
339
+ ... )
340
+ >>> df.select(pl.col("a").arr.median())
341
+ shape: (2, 1)
342
+ ┌─────┐
343
+ │ a │
344
+ │ --- │
345
+ │ f64 │
346
+ ╞═════╡
347
+ │ 1.5 │
348
+ │ 3.5 │
349
+ └─────┘
350
+ """
351
+ return wrap_expr(self._pyexpr.arr_median())
352
+
353
+ def unique(self, *, maintain_order: bool = False) -> Expr:
354
+ """
355
+ Get the unique/distinct values in the array.
356
+
357
+ Parameters
358
+ ----------
359
+ maintain_order
360
+ Maintain order of data. This requires more work.
361
+
362
+ Examples
363
+ --------
364
+ >>> df = pl.DataFrame(
365
+ ... {
366
+ ... "a": [[1, 1, 2]],
367
+ ... },
368
+ ... schema={"a": pl.Array(pl.Int64, 3)},
369
+ ... )
370
+ >>> df.select(pl.col("a").arr.unique())
371
+ shape: (1, 1)
372
+ ┌───────────┐
373
+ │ a │
374
+ │ --- │
375
+ │ list[i64] │
376
+ ╞═══════════╡
377
+ │ [1, 2] │
378
+ └───────────┘
379
+ """
380
+ return wrap_expr(self._pyexpr.arr_unique(maintain_order))
381
+
382
+ def n_unique(self) -> Expr:
383
+ """
384
+ Count the number of unique values in every sub-arrays.
385
+
386
+ Examples
387
+ --------
388
+ >>> df = pl.DataFrame(
389
+ ... {
390
+ ... "a": [[1, 1, 2], [2, 3, 4]],
391
+ ... },
392
+ ... schema={"a": pl.Array(pl.Int64, 3)},
393
+ ... )
394
+ >>> df.with_columns(n_unique=pl.col("a").arr.n_unique())
395
+ shape: (2, 2)
396
+ ┌───────────────┬──────────┐
397
+ │ a ┆ n_unique │
398
+ │ --- ┆ --- │
399
+ │ array[i64, 3] ┆ u32 │
400
+ ╞═══════════════╪══════════╡
401
+ │ [1, 1, 2] ┆ 2 │
402
+ │ [2, 3, 4] ┆ 3 │
403
+ └───────────────┴──────────┘
404
+ """
405
+ return wrap_expr(self._pyexpr.arr_n_unique())
406
+
407
+ def to_list(self) -> Expr:
408
+ """
409
+ Convert an Array column into a List column with the same inner data type.
410
+
411
+ Returns
412
+ -------
413
+ Expr
414
+ Expression of data type :class:`List`.
415
+
416
+ Examples
417
+ --------
418
+ >>> df = pl.DataFrame(
419
+ ... data={"a": [[1, 2], [3, 4]]},
420
+ ... schema={"a": pl.Array(pl.Int8, 2)},
421
+ ... )
422
+ >>> df.select(pl.col("a").arr.to_list())
423
+ shape: (2, 1)
424
+ ┌──────────┐
425
+ │ a │
426
+ │ --- │
427
+ │ list[i8] │
428
+ ╞══════════╡
429
+ │ [1, 2] │
430
+ │ [3, 4] │
431
+ └──────────┘
432
+ """
433
+ return wrap_expr(self._pyexpr.arr_to_list())
434
+
435
+ def any(self) -> Expr:
436
+ """
437
+ Evaluate whether any boolean value is true for every subarray.
438
+
439
+ Examples
440
+ --------
441
+ >>> df = pl.DataFrame(
442
+ ... data={
443
+ ... "a": [
444
+ ... [True, True],
445
+ ... [False, True],
446
+ ... [False, False],
447
+ ... [None, None],
448
+ ... None,
449
+ ... ]
450
+ ... },
451
+ ... schema={"a": pl.Array(pl.Boolean, 2)},
452
+ ... )
453
+ >>> df.with_columns(any=pl.col("a").arr.any())
454
+ shape: (5, 2)
455
+ ┌────────────────┬───────┐
456
+ │ a ┆ any │
457
+ │ --- ┆ --- │
458
+ │ array[bool, 2] ┆ bool │
459
+ ╞════════════════╪═══════╡
460
+ │ [true, true] ┆ true │
461
+ │ [false, true] ┆ true │
462
+ │ [false, false] ┆ false │
463
+ │ [null, null] ┆ false │
464
+ │ null ┆ null │
465
+ └────────────────┴───────┘
466
+ """
467
+ return wrap_expr(self._pyexpr.arr_any())
468
+
469
+ def all(self) -> Expr:
470
+ """
471
+ Evaluate whether all boolean values are true for every subarray.
472
+
473
+ Examples
474
+ --------
475
+ >>> df = pl.DataFrame(
476
+ ... data={
477
+ ... "a": [
478
+ ... [True, True],
479
+ ... [False, True],
480
+ ... [False, False],
481
+ ... [None, None],
482
+ ... None,
483
+ ... ]
484
+ ... },
485
+ ... schema={"a": pl.Array(pl.Boolean, 2)},
486
+ ... )
487
+ >>> df.with_columns(all=pl.col("a").arr.all())
488
+ shape: (5, 2)
489
+ ┌────────────────┬───────┐
490
+ │ a ┆ all │
491
+ │ --- ┆ --- │
492
+ │ array[bool, 2] ┆ bool │
493
+ ╞════════════════╪═══════╡
494
+ │ [true, true] ┆ true │
495
+ │ [false, true] ┆ false │
496
+ │ [false, false] ┆ false │
497
+ │ [null, null] ┆ true │
498
+ │ null ┆ null │
499
+ └────────────────┴───────┘
500
+ """
501
+ return wrap_expr(self._pyexpr.arr_all())
502
+
503
+ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
504
+ """
505
+ Sort the arrays in this column.
506
+
507
+ Parameters
508
+ ----------
509
+ descending
510
+ Sort in descending order.
511
+ nulls_last
512
+ Place null values last.
513
+
514
+ Examples
515
+ --------
516
+ >>> df = pl.DataFrame(
517
+ ... {
518
+ ... "a": [[3, 2, 1], [9, 1, 2]],
519
+ ... },
520
+ ... schema={"a": pl.Array(pl.Int64, 3)},
521
+ ... )
522
+ >>> df.with_columns(sort=pl.col("a").arr.sort())
523
+ shape: (2, 2)
524
+ ┌───────────────┬───────────────┐
525
+ │ a ┆ sort │
526
+ │ --- ┆ --- │
527
+ │ array[i64, 3] ┆ array[i64, 3] │
528
+ ╞═══════════════╪═══════════════╡
529
+ │ [3, 2, 1] ┆ [1, 2, 3] │
530
+ │ [9, 1, 2] ┆ [1, 2, 9] │
531
+ └───────────────┴───────────────┘
532
+ >>> df.with_columns(sort=pl.col("a").arr.sort(descending=True))
533
+ shape: (2, 2)
534
+ ┌───────────────┬───────────────┐
535
+ │ a ┆ sort │
536
+ │ --- ┆ --- │
537
+ │ array[i64, 3] ┆ array[i64, 3] │
538
+ ╞═══════════════╪═══════════════╡
539
+ │ [3, 2, 1] ┆ [3, 2, 1] │
540
+ │ [9, 1, 2] ┆ [9, 2, 1] │
541
+ └───────────────┴───────────────┘
542
+ """
543
+ return wrap_expr(self._pyexpr.arr_sort(descending, nulls_last))
544
+
545
+ def reverse(self) -> Expr:
546
+ """
547
+ Reverse the arrays in this column.
548
+
549
+ Examples
550
+ --------
551
+ >>> df = pl.DataFrame(
552
+ ... {
553
+ ... "a": [[3, 2, 1], [9, 1, 2]],
554
+ ... },
555
+ ... schema={"a": pl.Array(pl.Int64, 3)},
556
+ ... )
557
+ >>> df.with_columns(reverse=pl.col("a").arr.reverse())
558
+ shape: (2, 2)
559
+ ┌───────────────┬───────────────┐
560
+ │ a ┆ reverse │
561
+ │ --- ┆ --- │
562
+ │ array[i64, 3] ┆ array[i64, 3] │
563
+ ╞═══════════════╪═══════════════╡
564
+ │ [3, 2, 1] ┆ [1, 2, 3] │
565
+ │ [9, 1, 2] ┆ [2, 1, 9] │
566
+ └───────────────┴───────────────┘
567
+ """
568
+ return wrap_expr(self._pyexpr.arr_reverse())
569
+
570
+ def arg_min(self) -> Expr:
571
+ """
572
+ Retrieve the index of the minimal value in every sub-array.
573
+
574
+ Returns
575
+ -------
576
+ Expr
577
+ Expression of data type :class:`UInt32` or :class:`UInt64`
578
+ (depending on compilation).
579
+
580
+ Examples
581
+ --------
582
+ >>> df = pl.DataFrame(
583
+ ... {
584
+ ... "a": [[1, 2], [2, 1]],
585
+ ... },
586
+ ... schema={"a": pl.Array(pl.Int64, 2)},
587
+ ... )
588
+ >>> df.with_columns(arg_min=pl.col("a").arr.arg_min())
589
+ shape: (2, 2)
590
+ ┌───────────────┬─────────┐
591
+ │ a ┆ arg_min │
592
+ │ --- ┆ --- │
593
+ │ array[i64, 2] ┆ u32 │
594
+ ╞═══════════════╪═════════╡
595
+ │ [1, 2] ┆ 0 │
596
+ │ [2, 1] ┆ 1 │
597
+ └───────────────┴─────────┘
598
+ """
599
+ return wrap_expr(self._pyexpr.arr_arg_min())
600
+
601
+ def arg_max(self) -> Expr:
602
+ """
603
+ Retrieve the index of the maximum value in every sub-array.
604
+
605
+ Returns
606
+ -------
607
+ Expr
608
+ Expression of data type :class:`UInt32` or :class:`UInt64`
609
+ (depending on compilation).
610
+
611
+ Examples
612
+ --------
613
+ >>> df = pl.DataFrame(
614
+ ... {
615
+ ... "a": [[1, 2], [2, 1]],
616
+ ... },
617
+ ... schema={"a": pl.Array(pl.Int64, 2)},
618
+ ... )
619
+ >>> df.with_columns(arg_max=pl.col("a").arr.arg_max())
620
+ shape: (2, 2)
621
+ ┌───────────────┬─────────┐
622
+ │ a ┆ arg_max │
623
+ │ --- ┆ --- │
624
+ │ array[i64, 2] ┆ u32 │
625
+ ╞═══════════════╪═════════╡
626
+ │ [1, 2] ┆ 1 │
627
+ │ [2, 1] ┆ 0 │
628
+ └───────────────┴─────────┘
629
+ """
630
+ return wrap_expr(self._pyexpr.arr_arg_max())
631
+
632
+ def get(self, index: int | IntoExprColumn, *, null_on_oob: bool = False) -> Expr:
633
+ """
634
+ Get the value by index in the sub-arrays.
635
+
636
+ So index `0` would return the first item of every sublist
637
+ and index `-1` would return the last item of every sublist
638
+ if an index is out of bounds, it will return a `None`.
639
+
640
+ Parameters
641
+ ----------
642
+ index
643
+ Index to return per sub-array
644
+ null_on_oob
645
+ Behavior if an index is out of bounds:
646
+ True -> set as null
647
+ False -> raise an error
648
+
649
+ Examples
650
+ --------
651
+ >>> df = pl.DataFrame(
652
+ ... {"arr": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx": [1, -2, 0]},
653
+ ... schema={"arr": pl.Array(pl.Int32, 3), "idx": pl.Int32},
654
+ ... )
655
+ >>> df.with_columns(get=pl.col("arr").arr.get("idx", null_on_oob=True))
656
+ shape: (3, 3)
657
+ ┌───────────────┬─────┬─────┐
658
+ │ arr ┆ idx ┆ get │
659
+ │ --- ┆ --- ┆ --- │
660
+ │ array[i32, 3] ┆ i32 ┆ i32 │
661
+ ╞═══════════════╪═════╪═════╡
662
+ │ [1, 2, 3] ┆ 1 ┆ 2 │
663
+ │ [4, 5, 6] ┆ -2 ┆ 5 │
664
+ │ [7, 8, 9] ┆ 0 ┆ 7 │
665
+ └───────────────┴─────┴─────┘
666
+ """
667
+ index_pyexpr = parse_into_expression(index)
668
+ return wrap_expr(self._pyexpr.arr_get(index_pyexpr, null_on_oob))
669
+
670
+ def first(self) -> Expr:
671
+ """
672
+ Get the first value of the sub-arrays.
673
+
674
+ Examples
675
+ --------
676
+ >>> df = pl.DataFrame(
677
+ ... {"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
678
+ ... schema={"a": pl.Array(pl.Int32, 3)},
679
+ ... )
680
+ >>> df.with_columns(first=pl.col("a").arr.first())
681
+ shape: (3, 2)
682
+ ┌───────────────┬───────┐
683
+ │ a ┆ first │
684
+ │ --- ┆ --- │
685
+ │ array[i32, 3] ┆ i32 │
686
+ ╞═══════════════╪═══════╡
687
+ │ [1, 2, 3] ┆ 1 │
688
+ │ [4, 5, 6] ┆ 4 │
689
+ │ [7, 8, 9] ┆ 7 │
690
+ └───────────────┴───────┘
691
+ """
692
+ return self.get(0, null_on_oob=True)
693
+
694
+ def last(self) -> Expr:
695
+ """
696
+ Get the last value of the sub-arrays.
697
+
698
+ Examples
699
+ --------
700
+ >>> df = pl.DataFrame(
701
+ ... {"a": [[1, 2, 3], [4, 5, 6], [7, 9, 8]]},
702
+ ... schema={"a": pl.Array(pl.Int32, 3)},
703
+ ... )
704
+ >>> df.with_columns(last=pl.col("a").arr.last())
705
+ shape: (3, 2)
706
+ ┌───────────────┬──────┐
707
+ │ a ┆ last │
708
+ │ --- ┆ --- │
709
+ │ array[i32, 3] ┆ i32 │
710
+ ╞═══════════════╪══════╡
711
+ │ [1, 2, 3] ┆ 3 │
712
+ │ [4, 5, 6] ┆ 6 │
713
+ │ [7, 9, 8] ┆ 8 │
714
+ └───────────────┴──────┘
715
+ """
716
+ return self.get(-1, null_on_oob=True)
717
+
718
+ def join(self, separator: IntoExprColumn, *, ignore_nulls: bool = True) -> Expr:
719
+ """
720
+ Join all string items in a sub-array and place a separator between them.
721
+
722
+ This errors if inner type of array `!= String`.
723
+
724
+ Parameters
725
+ ----------
726
+ separator
727
+ string to separate the items with
728
+ ignore_nulls
729
+ Ignore null values (default).
730
+
731
+ If set to ``False``, null values will be propagated.
732
+ If the sub-list contains any null values, the output is ``None``.
733
+
734
+ Returns
735
+ -------
736
+ Expr
737
+ Expression of data type :class:`String`.
738
+
739
+ Examples
740
+ --------
741
+ >>> df = pl.DataFrame(
742
+ ... {"s": [["a", "b"], ["x", "y"]], "separator": ["*", "_"]},
743
+ ... schema={
744
+ ... "s": pl.Array(pl.String, 2),
745
+ ... "separator": pl.String,
746
+ ... },
747
+ ... )
748
+ >>> df.with_columns(join=pl.col("s").arr.join(pl.col("separator")))
749
+ shape: (2, 3)
750
+ ┌───────────────┬───────────┬──────┐
751
+ │ s ┆ separator ┆ join │
752
+ │ --- ┆ --- ┆ --- │
753
+ │ array[str, 2] ┆ str ┆ str │
754
+ ╞═══════════════╪═══════════╪══════╡
755
+ │ ["a", "b"] ┆ * ┆ a*b │
756
+ │ ["x", "y"] ┆ _ ┆ x_y │
757
+ └───────────────┴───────────┴──────┘
758
+ """
759
+ separator_pyexpr = parse_into_expression(separator, str_as_lit=True)
760
+ return wrap_expr(self._pyexpr.arr_join(separator_pyexpr, ignore_nulls))
761
+
762
+ def explode(self) -> Expr:
763
+ """
764
+ Returns a column with a separate row for every array element.
765
+
766
+ Returns
767
+ -------
768
+ Expr
769
+ Expression with the data type of the array elements.
770
+
771
+ Examples
772
+ --------
773
+ >>> df = pl.DataFrame(
774
+ ... {"a": [[1, 2, 3], [4, 5, 6]]}, schema={"a": pl.Array(pl.Int64, 3)}
775
+ ... )
776
+ >>> df.select(pl.col("a").arr.explode())
777
+ shape: (6, 1)
778
+ ┌─────┐
779
+ │ a │
780
+ │ --- │
781
+ │ i64 │
782
+ ╞═════╡
783
+ │ 1 │
784
+ │ 2 │
785
+ │ 3 │
786
+ │ 4 │
787
+ │ 5 │
788
+ │ 6 │
789
+ └─────┘
790
+ """
791
+ return wrap_expr(self._pyexpr.arr_explode())
792
+
793
+ def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Expr:
794
+ """
795
+ Check if sub-arrays contain the given item.
796
+
797
+ Parameters
798
+ ----------
799
+ item
800
+ Item that will be checked for membership
801
+ nulls_equal : bool, default True
802
+ If True, treat null as a distinct value. Null values will not propagate.
803
+
804
+ Returns
805
+ -------
806
+ Expr
807
+ Expression of data type :class:`Boolean`.
808
+
809
+ Examples
810
+ --------
811
+ >>> df = pl.DataFrame(
812
+ ... {"a": [["a", "b"], ["x", "y"], ["a", "c"]]},
813
+ ... schema={"a": pl.Array(pl.String, 2)},
814
+ ... )
815
+ >>> df.with_columns(contains=pl.col("a").arr.contains("a"))
816
+ shape: (3, 2)
817
+ ┌───────────────┬──────────┐
818
+ │ a ┆ contains │
819
+ │ --- ┆ --- │
820
+ │ array[str, 2] ┆ bool │
821
+ ╞═══════════════╪══════════╡
822
+ │ ["a", "b"] ┆ true │
823
+ │ ["x", "y"] ┆ false │
824
+ │ ["a", "c"] ┆ true │
825
+ └───────────────┴──────────┘
826
+ """
827
+ item_pyexpr = parse_into_expression(item, str_as_lit=True)
828
+ return wrap_expr(self._pyexpr.arr_contains(item_pyexpr, nulls_equal))
829
+
830
+ def count_matches(self, element: IntoExpr) -> Expr:
831
+ """
832
+ Count how often the value produced by `element` occurs.
833
+
834
+ Parameters
835
+ ----------
836
+ element
837
+ An expression that produces a single value
838
+
839
+ Examples
840
+ --------
841
+ >>> df = pl.DataFrame(
842
+ ... {"a": [[1, 2], [1, 1], [2, 2]]}, schema={"a": pl.Array(pl.Int64, 2)}
843
+ ... )
844
+ >>> df.with_columns(number_of_twos=pl.col("a").arr.count_matches(2))
845
+ shape: (3, 2)
846
+ ┌───────────────┬────────────────┐
847
+ │ a ┆ number_of_twos │
848
+ │ --- ┆ --- │
849
+ │ array[i64, 2] ┆ u32 │
850
+ ╞═══════════════╪════════════════╡
851
+ │ [1, 2] ┆ 1 │
852
+ │ [1, 1] ┆ 0 │
853
+ │ [2, 2] ┆ 2 │
854
+ └───────────────┴────────────────┘
855
+ """
856
+ element_pyexpr = parse_into_expression(element, str_as_lit=True)
857
+ return wrap_expr(self._pyexpr.arr_count_matches(element_pyexpr))
858
+
859
+ def to_struct(
860
+ self, fields: Sequence[str] | Callable[[int], str] | None = None
861
+ ) -> Expr:
862
+ """
863
+ Convert the Series of type `Array` to a Series of type `Struct`.
864
+
865
+ Parameters
866
+ ----------
867
+ fields
868
+ If the name and number of the desired fields is known in advance
869
+ a list of field names can be given, which will be assigned by index.
870
+ Otherwise, to dynamically assign field names, a custom function can be
871
+ used; if neither are set, fields will be `field_0, field_1 .. field_n`.
872
+
873
+ Examples
874
+ --------
875
+ Convert array to struct with default field name assignment:
876
+
877
+ >>> df = pl.DataFrame(
878
+ ... {"n": [[0, 1, 2], [3, 4, 5]]}, schema={"n": pl.Array(pl.Int8, 3)}
879
+ ... )
880
+ >>> df.with_columns(struct=pl.col("n").arr.to_struct())
881
+ shape: (2, 2)
882
+ ┌──────────────┬───────────┐
883
+ │ n ┆ struct │
884
+ │ --- ┆ --- │
885
+ │ array[i8, 3] ┆ struct[3] │
886
+ ╞══════════════╪═══════════╡
887
+ │ [0, 1, 2] ┆ {0,1,2} │
888
+ │ [3, 4, 5] ┆ {3,4,5} │
889
+ └──────────────┴───────────┘
890
+
891
+ Convert array to struct with field name assignment by function/index:
892
+
893
+ >>> df = pl.DataFrame(
894
+ ... {"n": [[0, 1, 2], [3, 4, 5]]}, schema={"n": pl.Array(pl.Int8, 3)}
895
+ ... )
896
+ >>> df.select(pl.col("n").arr.to_struct(fields=lambda idx: f"n{idx}")).rows(
897
+ ... named=True
898
+ ... )
899
+ [{'n': {'n0': 0, 'n1': 1, 'n2': 2}}, {'n': {'n0': 3, 'n1': 4, 'n2': 5}}]
900
+
901
+ Convert array to struct with field name assignment by
902
+ index from a list of names:
903
+
904
+ >>> df.select(pl.col("n").arr.to_struct(fields=["c1", "c2", "c3"])).rows(
905
+ ... named=True
906
+ ... )
907
+ [{'n': {'c1': 0, 'c2': 1, 'c3': 2}}, {'n': {'c1': 3, 'c2': 4, 'c3': 5}}]
908
+ """
909
+ if isinstance(fields, Sequence):
910
+ field_names = list(fields)
911
+ pyexpr = self._pyexpr.arr_to_struct(None)
912
+ return wrap_expr(pyexpr).struct.rename_fields(field_names)
913
+ else:
914
+ pyexpr = self._pyexpr.arr_to_struct(fields)
915
+ return wrap_expr(pyexpr)
916
+
917
+ def shift(self, n: int | IntoExprColumn = 1) -> Expr:
918
+ """
919
+ Shift array values by the given number of indices.
920
+
921
+ Parameters
922
+ ----------
923
+ n
924
+ Number of indices to shift forward. If a negative value is passed, values
925
+ are shifted in the opposite direction instead.
926
+
927
+ Notes
928
+ -----
929
+ This method is similar to the `LAG` operation in SQL when the value for `n`
930
+ is positive. With a negative value for `n`, it is similar to `LEAD`.
931
+
932
+ Examples
933
+ --------
934
+ By default, array values are shifted forward by one index.
935
+
936
+ >>> df = pl.DataFrame(
937
+ ... {"a": [[1, 2, 3], [4, 5, 6]]}, schema={"a": pl.Array(pl.Int64, 3)}
938
+ ... )
939
+ >>> df.with_columns(shift=pl.col("a").arr.shift())
940
+ shape: (2, 2)
941
+ ┌───────────────┬───────────────┐
942
+ │ a ┆ shift │
943
+ │ --- ┆ --- │
944
+ │ array[i64, 3] ┆ array[i64, 3] │
945
+ ╞═══════════════╪═══════════════╡
946
+ │ [1, 2, 3] ┆ [null, 1, 2] │
947
+ │ [4, 5, 6] ┆ [null, 4, 5] │
948
+ └───────────────┴───────────────┘
949
+
950
+ Pass a negative value to shift in the opposite direction instead.
951
+
952
+ >>> df.with_columns(shift=pl.col("a").arr.shift(-2))
953
+ shape: (2, 2)
954
+ ┌───────────────┬─────────────────┐
955
+ │ a ┆ shift │
956
+ │ --- ┆ --- │
957
+ │ array[i64, 3] ┆ array[i64, 3] │
958
+ ╞═══════════════╪═════════════════╡
959
+ │ [1, 2, 3] ┆ [3, null, null] │
960
+ │ [4, 5, 6] ┆ [6, null, null] │
961
+ └───────────────┴─────────────────┘
962
+ """
963
+ n_pyexpr = parse_into_expression(n)
964
+ return wrap_expr(self._pyexpr.arr_shift(n_pyexpr))