polars-runtime-compat 1.34.0b3__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b5__cp39-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
- polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/expr/array.py
DELETED
|
@@ -1,964 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from collections.abc import Sequence
|
|
4
|
-
from typing import TYPE_CHECKING, Callable
|
|
5
|
-
|
|
6
|
-
from polars._utils.parse import parse_into_expression
|
|
7
|
-
from polars._utils.wrap import wrap_expr
|
|
8
|
-
|
|
9
|
-
if TYPE_CHECKING:
|
|
10
|
-
from polars import Expr
|
|
11
|
-
from polars._typing import IntoExpr, IntoExprColumn
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class ExprArrayNameSpace:
|
|
15
|
-
"""Namespace for array related expressions."""
|
|
16
|
-
|
|
17
|
-
_accessor = "arr"
|
|
18
|
-
|
|
19
|
-
def __init__(self, expr: Expr) -> None:
|
|
20
|
-
self._pyexpr = expr._pyexpr
|
|
21
|
-
|
|
22
|
-
def len(self) -> Expr:
|
|
23
|
-
"""
|
|
24
|
-
Return the number of elements in each array.
|
|
25
|
-
|
|
26
|
-
Examples
|
|
27
|
-
--------
|
|
28
|
-
>>> df = pl.DataFrame(
|
|
29
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
30
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
31
|
-
... )
|
|
32
|
-
>>> df.select(pl.col("a").arr.len())
|
|
33
|
-
shape: (2, 1)
|
|
34
|
-
┌─────┐
|
|
35
|
-
│ a │
|
|
36
|
-
│ --- │
|
|
37
|
-
│ u32 │
|
|
38
|
-
╞═════╡
|
|
39
|
-
│ 2 │
|
|
40
|
-
│ 2 │
|
|
41
|
-
└─────┘
|
|
42
|
-
"""
|
|
43
|
-
return wrap_expr(self._pyexpr.arr_len())
|
|
44
|
-
|
|
45
|
-
def slice(
|
|
46
|
-
self,
|
|
47
|
-
offset: int | str | Expr,
|
|
48
|
-
length: int | str | Expr | None = None,
|
|
49
|
-
*,
|
|
50
|
-
as_array: bool = False,
|
|
51
|
-
) -> Expr:
|
|
52
|
-
"""
|
|
53
|
-
Slice every subarray.
|
|
54
|
-
|
|
55
|
-
Parameters
|
|
56
|
-
----------
|
|
57
|
-
offset
|
|
58
|
-
Start index. Negative indexing is supported.
|
|
59
|
-
length
|
|
60
|
-
Length of the slice. If set to `None` (default), the slice is taken to the
|
|
61
|
-
end of the list.
|
|
62
|
-
as_array
|
|
63
|
-
Return result as a fixed-length `Array`, otherwise as a `List`.
|
|
64
|
-
If true `length` and `offset` must be constant values.
|
|
65
|
-
|
|
66
|
-
Examples
|
|
67
|
-
--------
|
|
68
|
-
>>> df = pl.DataFrame(
|
|
69
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
70
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
71
|
-
... )
|
|
72
|
-
>>> df.select(pl.col("a").arr.slice(0, 1))
|
|
73
|
-
shape: (2, 1)
|
|
74
|
-
┌───────────┐
|
|
75
|
-
│ a │
|
|
76
|
-
│ --- │
|
|
77
|
-
│ list[i64] │
|
|
78
|
-
╞═══════════╡
|
|
79
|
-
│ [1] │
|
|
80
|
-
│ [4] │
|
|
81
|
-
└───────────┘
|
|
82
|
-
>>> df = pl.DataFrame(
|
|
83
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
84
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
85
|
-
... )
|
|
86
|
-
>>> df.select(pl.col("a").arr.slice(0, 1, as_array=True))
|
|
87
|
-
shape: (2, 1)
|
|
88
|
-
┌───────────────┐
|
|
89
|
-
│ a │
|
|
90
|
-
│ --- │
|
|
91
|
-
│ array[i64, 1] │
|
|
92
|
-
╞═══════════════╡
|
|
93
|
-
│ [1] │
|
|
94
|
-
│ [4] │
|
|
95
|
-
└───────────────┘
|
|
96
|
-
"""
|
|
97
|
-
offset_pyexpr = parse_into_expression(offset)
|
|
98
|
-
length_pyexpr = parse_into_expression(length) if length is not None else None
|
|
99
|
-
return wrap_expr(self._pyexpr.arr_slice(offset_pyexpr, length_pyexpr, as_array))
|
|
100
|
-
|
|
101
|
-
def head(self, n: int | str | Expr = 5, *, as_array: bool = False) -> Expr:
|
|
102
|
-
"""
|
|
103
|
-
Get the first `n` elements of the sub-arrays.
|
|
104
|
-
|
|
105
|
-
Parameters
|
|
106
|
-
----------
|
|
107
|
-
n
|
|
108
|
-
Number of values to return for each sublist.
|
|
109
|
-
as_array
|
|
110
|
-
Return result as a fixed-length `Array`, otherwise as a `List`.
|
|
111
|
-
If true `n` must be a constant value.
|
|
112
|
-
|
|
113
|
-
Examples
|
|
114
|
-
--------
|
|
115
|
-
>>> df = pl.DataFrame(
|
|
116
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
117
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
118
|
-
... )
|
|
119
|
-
>>> df.select(pl.col("a").arr.head(1))
|
|
120
|
-
shape: (2, 1)
|
|
121
|
-
┌───────────┐
|
|
122
|
-
│ a │
|
|
123
|
-
│ --- │
|
|
124
|
-
│ list[i64] │
|
|
125
|
-
╞═══════════╡
|
|
126
|
-
│ [1] │
|
|
127
|
-
│ [4] │
|
|
128
|
-
└───────────┘
|
|
129
|
-
>>> df = pl.DataFrame(
|
|
130
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
131
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
132
|
-
... )
|
|
133
|
-
>>> df.select(pl.col("a").arr.head(1, as_array=True))
|
|
134
|
-
shape: (2, 1)
|
|
135
|
-
┌───────────────┐
|
|
136
|
-
│ a │
|
|
137
|
-
│ --- │
|
|
138
|
-
│ array[i64, 1] │
|
|
139
|
-
╞═══════════════╡
|
|
140
|
-
│ [1] │
|
|
141
|
-
│ [4] │
|
|
142
|
-
└───────────────┘
|
|
143
|
-
"""
|
|
144
|
-
return self.slice(0, n, as_array=as_array)
|
|
145
|
-
|
|
146
|
-
def tail(self, n: int | str | Expr = 5, *, as_array: bool = False) -> Expr:
|
|
147
|
-
"""
|
|
148
|
-
Slice the last `n` values of every sublist.
|
|
149
|
-
|
|
150
|
-
Parameters
|
|
151
|
-
----------
|
|
152
|
-
n
|
|
153
|
-
Number of values to return for each sublist.
|
|
154
|
-
as_array
|
|
155
|
-
Return result as a fixed-length `Array`, otherwise as a `List`.
|
|
156
|
-
If true `n` must be a constant value.
|
|
157
|
-
|
|
158
|
-
Examples
|
|
159
|
-
--------
|
|
160
|
-
>>> df = pl.DataFrame(
|
|
161
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
162
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
163
|
-
... )
|
|
164
|
-
>>> df.select(pl.col("a").arr.tail(1))
|
|
165
|
-
shape: (2, 1)
|
|
166
|
-
┌───────────┐
|
|
167
|
-
│ a │
|
|
168
|
-
│ --- │
|
|
169
|
-
│ list[i64] │
|
|
170
|
-
╞═══════════╡
|
|
171
|
-
│ [2] │
|
|
172
|
-
│ [3] │
|
|
173
|
-
└───────────┘
|
|
174
|
-
>>> df = pl.DataFrame(
|
|
175
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
176
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
177
|
-
... )
|
|
178
|
-
>>> df.select(pl.col("a").arr.tail(1, as_array=True))
|
|
179
|
-
shape: (2, 1)
|
|
180
|
-
┌───────────────┐
|
|
181
|
-
│ a │
|
|
182
|
-
│ --- │
|
|
183
|
-
│ array[i64, 1] │
|
|
184
|
-
╞═══════════════╡
|
|
185
|
-
│ [2] │
|
|
186
|
-
│ [3] │
|
|
187
|
-
└───────────────┘
|
|
188
|
-
"""
|
|
189
|
-
n_pyexpr = parse_into_expression(n)
|
|
190
|
-
return wrap_expr(self._pyexpr.arr_tail(n_pyexpr, as_array))
|
|
191
|
-
|
|
192
|
-
def min(self) -> Expr:
|
|
193
|
-
"""
|
|
194
|
-
Compute the min values of the sub-arrays.
|
|
195
|
-
|
|
196
|
-
Examples
|
|
197
|
-
--------
|
|
198
|
-
>>> df = pl.DataFrame(
|
|
199
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
200
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
201
|
-
... )
|
|
202
|
-
>>> df.select(pl.col("a").arr.min())
|
|
203
|
-
shape: (2, 1)
|
|
204
|
-
┌─────┐
|
|
205
|
-
│ a │
|
|
206
|
-
│ --- │
|
|
207
|
-
│ i64 │
|
|
208
|
-
╞═════╡
|
|
209
|
-
│ 1 │
|
|
210
|
-
│ 3 │
|
|
211
|
-
└─────┘
|
|
212
|
-
"""
|
|
213
|
-
return wrap_expr(self._pyexpr.arr_min())
|
|
214
|
-
|
|
215
|
-
def max(self) -> Expr:
|
|
216
|
-
"""
|
|
217
|
-
Compute the max values of the sub-arrays.
|
|
218
|
-
|
|
219
|
-
Examples
|
|
220
|
-
--------
|
|
221
|
-
>>> df = pl.DataFrame(
|
|
222
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
223
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
224
|
-
... )
|
|
225
|
-
>>> df.select(pl.col("a").arr.max())
|
|
226
|
-
shape: (2, 1)
|
|
227
|
-
┌─────┐
|
|
228
|
-
│ a │
|
|
229
|
-
│ --- │
|
|
230
|
-
│ i64 │
|
|
231
|
-
╞═════╡
|
|
232
|
-
│ 2 │
|
|
233
|
-
│ 4 │
|
|
234
|
-
└─────┘
|
|
235
|
-
"""
|
|
236
|
-
return wrap_expr(self._pyexpr.arr_max())
|
|
237
|
-
|
|
238
|
-
def sum(self) -> Expr:
|
|
239
|
-
"""
|
|
240
|
-
Compute the sum values of the sub-arrays.
|
|
241
|
-
|
|
242
|
-
Examples
|
|
243
|
-
--------
|
|
244
|
-
>>> df = pl.DataFrame(
|
|
245
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
246
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
247
|
-
... )
|
|
248
|
-
>>> df.select(pl.col("a").arr.sum())
|
|
249
|
-
shape: (2, 1)
|
|
250
|
-
┌─────┐
|
|
251
|
-
│ a │
|
|
252
|
-
│ --- │
|
|
253
|
-
│ i64 │
|
|
254
|
-
╞═════╡
|
|
255
|
-
│ 3 │
|
|
256
|
-
│ 7 │
|
|
257
|
-
└─────┘
|
|
258
|
-
"""
|
|
259
|
-
return wrap_expr(self._pyexpr.arr_sum())
|
|
260
|
-
|
|
261
|
-
def std(self, ddof: int = 1) -> Expr:
|
|
262
|
-
"""
|
|
263
|
-
Compute the std of the values of the sub-arrays.
|
|
264
|
-
|
|
265
|
-
Examples
|
|
266
|
-
--------
|
|
267
|
-
>>> df = pl.DataFrame(
|
|
268
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
269
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
270
|
-
... )
|
|
271
|
-
>>> df.select(pl.col("a").arr.std())
|
|
272
|
-
shape: (2, 1)
|
|
273
|
-
┌──────────┐
|
|
274
|
-
│ a │
|
|
275
|
-
│ --- │
|
|
276
|
-
│ f64 │
|
|
277
|
-
╞══════════╡
|
|
278
|
-
│ 0.707107 │
|
|
279
|
-
│ 0.707107 │
|
|
280
|
-
└──────────┘
|
|
281
|
-
"""
|
|
282
|
-
return wrap_expr(self._pyexpr.arr_std(ddof))
|
|
283
|
-
|
|
284
|
-
def var(self, ddof: int = 1) -> Expr:
|
|
285
|
-
"""
|
|
286
|
-
Compute the var of the values of the sub-arrays.
|
|
287
|
-
|
|
288
|
-
Examples
|
|
289
|
-
--------
|
|
290
|
-
>>> df = pl.DataFrame(
|
|
291
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
292
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
293
|
-
... )
|
|
294
|
-
>>> df.select(pl.col("a").arr.var())
|
|
295
|
-
shape: (2, 1)
|
|
296
|
-
┌─────┐
|
|
297
|
-
│ a │
|
|
298
|
-
│ --- │
|
|
299
|
-
│ f64 │
|
|
300
|
-
╞═════╡
|
|
301
|
-
│ 0.5 │
|
|
302
|
-
│ 0.5 │
|
|
303
|
-
└─────┘
|
|
304
|
-
"""
|
|
305
|
-
return wrap_expr(self._pyexpr.arr_var(ddof))
|
|
306
|
-
|
|
307
|
-
def mean(self) -> Expr:
|
|
308
|
-
"""
|
|
309
|
-
Compute the mean of the values of the sub-arrays.
|
|
310
|
-
|
|
311
|
-
Examples
|
|
312
|
-
--------
|
|
313
|
-
>>> df = pl.DataFrame(
|
|
314
|
-
... data={"a": [[1, 2, 3], [1, 1, 16]]},
|
|
315
|
-
... schema={"a": pl.Array(pl.Int64, 3)},
|
|
316
|
-
... )
|
|
317
|
-
>>> df.select(pl.col("a").arr.mean())
|
|
318
|
-
shape: (2, 1)
|
|
319
|
-
┌─────┐
|
|
320
|
-
│ a │
|
|
321
|
-
│ --- │
|
|
322
|
-
│ f64 │
|
|
323
|
-
╞═════╡
|
|
324
|
-
│ 2.0 │
|
|
325
|
-
│ 6.0 │
|
|
326
|
-
└─────┘
|
|
327
|
-
"""
|
|
328
|
-
return wrap_expr(self._pyexpr.arr_mean())
|
|
329
|
-
|
|
330
|
-
def median(self) -> Expr:
|
|
331
|
-
"""
|
|
332
|
-
Compute the median of the values of the sub-arrays.
|
|
333
|
-
|
|
334
|
-
Examples
|
|
335
|
-
--------
|
|
336
|
-
>>> df = pl.DataFrame(
|
|
337
|
-
... data={"a": [[1, 2], [4, 3]]},
|
|
338
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
339
|
-
... )
|
|
340
|
-
>>> df.select(pl.col("a").arr.median())
|
|
341
|
-
shape: (2, 1)
|
|
342
|
-
┌─────┐
|
|
343
|
-
│ a │
|
|
344
|
-
│ --- │
|
|
345
|
-
│ f64 │
|
|
346
|
-
╞═════╡
|
|
347
|
-
│ 1.5 │
|
|
348
|
-
│ 3.5 │
|
|
349
|
-
└─────┘
|
|
350
|
-
"""
|
|
351
|
-
return wrap_expr(self._pyexpr.arr_median())
|
|
352
|
-
|
|
353
|
-
def unique(self, *, maintain_order: bool = False) -> Expr:
|
|
354
|
-
"""
|
|
355
|
-
Get the unique/distinct values in the array.
|
|
356
|
-
|
|
357
|
-
Parameters
|
|
358
|
-
----------
|
|
359
|
-
maintain_order
|
|
360
|
-
Maintain order of data. This requires more work.
|
|
361
|
-
|
|
362
|
-
Examples
|
|
363
|
-
--------
|
|
364
|
-
>>> df = pl.DataFrame(
|
|
365
|
-
... {
|
|
366
|
-
... "a": [[1, 1, 2]],
|
|
367
|
-
... },
|
|
368
|
-
... schema={"a": pl.Array(pl.Int64, 3)},
|
|
369
|
-
... )
|
|
370
|
-
>>> df.select(pl.col("a").arr.unique())
|
|
371
|
-
shape: (1, 1)
|
|
372
|
-
┌───────────┐
|
|
373
|
-
│ a │
|
|
374
|
-
│ --- │
|
|
375
|
-
│ list[i64] │
|
|
376
|
-
╞═══════════╡
|
|
377
|
-
│ [1, 2] │
|
|
378
|
-
└───────────┘
|
|
379
|
-
"""
|
|
380
|
-
return wrap_expr(self._pyexpr.arr_unique(maintain_order))
|
|
381
|
-
|
|
382
|
-
def n_unique(self) -> Expr:
|
|
383
|
-
"""
|
|
384
|
-
Count the number of unique values in every sub-arrays.
|
|
385
|
-
|
|
386
|
-
Examples
|
|
387
|
-
--------
|
|
388
|
-
>>> df = pl.DataFrame(
|
|
389
|
-
... {
|
|
390
|
-
... "a": [[1, 1, 2], [2, 3, 4]],
|
|
391
|
-
... },
|
|
392
|
-
... schema={"a": pl.Array(pl.Int64, 3)},
|
|
393
|
-
... )
|
|
394
|
-
>>> df.with_columns(n_unique=pl.col("a").arr.n_unique())
|
|
395
|
-
shape: (2, 2)
|
|
396
|
-
┌───────────────┬──────────┐
|
|
397
|
-
│ a ┆ n_unique │
|
|
398
|
-
│ --- ┆ --- │
|
|
399
|
-
│ array[i64, 3] ┆ u32 │
|
|
400
|
-
╞═══════════════╪══════════╡
|
|
401
|
-
│ [1, 1, 2] ┆ 2 │
|
|
402
|
-
│ [2, 3, 4] ┆ 3 │
|
|
403
|
-
└───────────────┴──────────┘
|
|
404
|
-
"""
|
|
405
|
-
return wrap_expr(self._pyexpr.arr_n_unique())
|
|
406
|
-
|
|
407
|
-
def to_list(self) -> Expr:
|
|
408
|
-
"""
|
|
409
|
-
Convert an Array column into a List column with the same inner data type.
|
|
410
|
-
|
|
411
|
-
Returns
|
|
412
|
-
-------
|
|
413
|
-
Expr
|
|
414
|
-
Expression of data type :class:`List`.
|
|
415
|
-
|
|
416
|
-
Examples
|
|
417
|
-
--------
|
|
418
|
-
>>> df = pl.DataFrame(
|
|
419
|
-
... data={"a": [[1, 2], [3, 4]]},
|
|
420
|
-
... schema={"a": pl.Array(pl.Int8, 2)},
|
|
421
|
-
... )
|
|
422
|
-
>>> df.select(pl.col("a").arr.to_list())
|
|
423
|
-
shape: (2, 1)
|
|
424
|
-
┌──────────┐
|
|
425
|
-
│ a │
|
|
426
|
-
│ --- │
|
|
427
|
-
│ list[i8] │
|
|
428
|
-
╞══════════╡
|
|
429
|
-
│ [1, 2] │
|
|
430
|
-
│ [3, 4] │
|
|
431
|
-
└──────────┘
|
|
432
|
-
"""
|
|
433
|
-
return wrap_expr(self._pyexpr.arr_to_list())
|
|
434
|
-
|
|
435
|
-
def any(self) -> Expr:
|
|
436
|
-
"""
|
|
437
|
-
Evaluate whether any boolean value is true for every subarray.
|
|
438
|
-
|
|
439
|
-
Examples
|
|
440
|
-
--------
|
|
441
|
-
>>> df = pl.DataFrame(
|
|
442
|
-
... data={
|
|
443
|
-
... "a": [
|
|
444
|
-
... [True, True],
|
|
445
|
-
... [False, True],
|
|
446
|
-
... [False, False],
|
|
447
|
-
... [None, None],
|
|
448
|
-
... None,
|
|
449
|
-
... ]
|
|
450
|
-
... },
|
|
451
|
-
... schema={"a": pl.Array(pl.Boolean, 2)},
|
|
452
|
-
... )
|
|
453
|
-
>>> df.with_columns(any=pl.col("a").arr.any())
|
|
454
|
-
shape: (5, 2)
|
|
455
|
-
┌────────────────┬───────┐
|
|
456
|
-
│ a ┆ any │
|
|
457
|
-
│ --- ┆ --- │
|
|
458
|
-
│ array[bool, 2] ┆ bool │
|
|
459
|
-
╞════════════════╪═══════╡
|
|
460
|
-
│ [true, true] ┆ true │
|
|
461
|
-
│ [false, true] ┆ true │
|
|
462
|
-
│ [false, false] ┆ false │
|
|
463
|
-
│ [null, null] ┆ false │
|
|
464
|
-
│ null ┆ null │
|
|
465
|
-
└────────────────┴───────┘
|
|
466
|
-
"""
|
|
467
|
-
return wrap_expr(self._pyexpr.arr_any())
|
|
468
|
-
|
|
469
|
-
def all(self) -> Expr:
|
|
470
|
-
"""
|
|
471
|
-
Evaluate whether all boolean values are true for every subarray.
|
|
472
|
-
|
|
473
|
-
Examples
|
|
474
|
-
--------
|
|
475
|
-
>>> df = pl.DataFrame(
|
|
476
|
-
... data={
|
|
477
|
-
... "a": [
|
|
478
|
-
... [True, True],
|
|
479
|
-
... [False, True],
|
|
480
|
-
... [False, False],
|
|
481
|
-
... [None, None],
|
|
482
|
-
... None,
|
|
483
|
-
... ]
|
|
484
|
-
... },
|
|
485
|
-
... schema={"a": pl.Array(pl.Boolean, 2)},
|
|
486
|
-
... )
|
|
487
|
-
>>> df.with_columns(all=pl.col("a").arr.all())
|
|
488
|
-
shape: (5, 2)
|
|
489
|
-
┌────────────────┬───────┐
|
|
490
|
-
│ a ┆ all │
|
|
491
|
-
│ --- ┆ --- │
|
|
492
|
-
│ array[bool, 2] ┆ bool │
|
|
493
|
-
╞════════════════╪═══════╡
|
|
494
|
-
│ [true, true] ┆ true │
|
|
495
|
-
│ [false, true] ┆ false │
|
|
496
|
-
│ [false, false] ┆ false │
|
|
497
|
-
│ [null, null] ┆ true │
|
|
498
|
-
│ null ┆ null │
|
|
499
|
-
└────────────────┴───────┘
|
|
500
|
-
"""
|
|
501
|
-
return wrap_expr(self._pyexpr.arr_all())
|
|
502
|
-
|
|
503
|
-
def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
|
|
504
|
-
"""
|
|
505
|
-
Sort the arrays in this column.
|
|
506
|
-
|
|
507
|
-
Parameters
|
|
508
|
-
----------
|
|
509
|
-
descending
|
|
510
|
-
Sort in descending order.
|
|
511
|
-
nulls_last
|
|
512
|
-
Place null values last.
|
|
513
|
-
|
|
514
|
-
Examples
|
|
515
|
-
--------
|
|
516
|
-
>>> df = pl.DataFrame(
|
|
517
|
-
... {
|
|
518
|
-
... "a": [[3, 2, 1], [9, 1, 2]],
|
|
519
|
-
... },
|
|
520
|
-
... schema={"a": pl.Array(pl.Int64, 3)},
|
|
521
|
-
... )
|
|
522
|
-
>>> df.with_columns(sort=pl.col("a").arr.sort())
|
|
523
|
-
shape: (2, 2)
|
|
524
|
-
┌───────────────┬───────────────┐
|
|
525
|
-
│ a ┆ sort │
|
|
526
|
-
│ --- ┆ --- │
|
|
527
|
-
│ array[i64, 3] ┆ array[i64, 3] │
|
|
528
|
-
╞═══════════════╪═══════════════╡
|
|
529
|
-
│ [3, 2, 1] ┆ [1, 2, 3] │
|
|
530
|
-
│ [9, 1, 2] ┆ [1, 2, 9] │
|
|
531
|
-
└───────────────┴───────────────┘
|
|
532
|
-
>>> df.with_columns(sort=pl.col("a").arr.sort(descending=True))
|
|
533
|
-
shape: (2, 2)
|
|
534
|
-
┌───────────────┬───────────────┐
|
|
535
|
-
│ a ┆ sort │
|
|
536
|
-
│ --- ┆ --- │
|
|
537
|
-
│ array[i64, 3] ┆ array[i64, 3] │
|
|
538
|
-
╞═══════════════╪═══════════════╡
|
|
539
|
-
│ [3, 2, 1] ┆ [3, 2, 1] │
|
|
540
|
-
│ [9, 1, 2] ┆ [9, 2, 1] │
|
|
541
|
-
└───────────────┴───────────────┘
|
|
542
|
-
"""
|
|
543
|
-
return wrap_expr(self._pyexpr.arr_sort(descending, nulls_last))
|
|
544
|
-
|
|
545
|
-
def reverse(self) -> Expr:
|
|
546
|
-
"""
|
|
547
|
-
Reverse the arrays in this column.
|
|
548
|
-
|
|
549
|
-
Examples
|
|
550
|
-
--------
|
|
551
|
-
>>> df = pl.DataFrame(
|
|
552
|
-
... {
|
|
553
|
-
... "a": [[3, 2, 1], [9, 1, 2]],
|
|
554
|
-
... },
|
|
555
|
-
... schema={"a": pl.Array(pl.Int64, 3)},
|
|
556
|
-
... )
|
|
557
|
-
>>> df.with_columns(reverse=pl.col("a").arr.reverse())
|
|
558
|
-
shape: (2, 2)
|
|
559
|
-
┌───────────────┬───────────────┐
|
|
560
|
-
│ a ┆ reverse │
|
|
561
|
-
│ --- ┆ --- │
|
|
562
|
-
│ array[i64, 3] ┆ array[i64, 3] │
|
|
563
|
-
╞═══════════════╪═══════════════╡
|
|
564
|
-
│ [3, 2, 1] ┆ [1, 2, 3] │
|
|
565
|
-
│ [9, 1, 2] ┆ [2, 1, 9] │
|
|
566
|
-
└───────────────┴───────────────┘
|
|
567
|
-
"""
|
|
568
|
-
return wrap_expr(self._pyexpr.arr_reverse())
|
|
569
|
-
|
|
570
|
-
def arg_min(self) -> Expr:
|
|
571
|
-
"""
|
|
572
|
-
Retrieve the index of the minimal value in every sub-array.
|
|
573
|
-
|
|
574
|
-
Returns
|
|
575
|
-
-------
|
|
576
|
-
Expr
|
|
577
|
-
Expression of data type :class:`UInt32` or :class:`UInt64`
|
|
578
|
-
(depending on compilation).
|
|
579
|
-
|
|
580
|
-
Examples
|
|
581
|
-
--------
|
|
582
|
-
>>> df = pl.DataFrame(
|
|
583
|
-
... {
|
|
584
|
-
... "a": [[1, 2], [2, 1]],
|
|
585
|
-
... },
|
|
586
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
587
|
-
... )
|
|
588
|
-
>>> df.with_columns(arg_min=pl.col("a").arr.arg_min())
|
|
589
|
-
shape: (2, 2)
|
|
590
|
-
┌───────────────┬─────────┐
|
|
591
|
-
│ a ┆ arg_min │
|
|
592
|
-
│ --- ┆ --- │
|
|
593
|
-
│ array[i64, 2] ┆ u32 │
|
|
594
|
-
╞═══════════════╪═════════╡
|
|
595
|
-
│ [1, 2] ┆ 0 │
|
|
596
|
-
│ [2, 1] ┆ 1 │
|
|
597
|
-
└───────────────┴─────────┘
|
|
598
|
-
"""
|
|
599
|
-
return wrap_expr(self._pyexpr.arr_arg_min())
|
|
600
|
-
|
|
601
|
-
def arg_max(self) -> Expr:
|
|
602
|
-
"""
|
|
603
|
-
Retrieve the index of the maximum value in every sub-array.
|
|
604
|
-
|
|
605
|
-
Returns
|
|
606
|
-
-------
|
|
607
|
-
Expr
|
|
608
|
-
Expression of data type :class:`UInt32` or :class:`UInt64`
|
|
609
|
-
(depending on compilation).
|
|
610
|
-
|
|
611
|
-
Examples
|
|
612
|
-
--------
|
|
613
|
-
>>> df = pl.DataFrame(
|
|
614
|
-
... {
|
|
615
|
-
... "a": [[1, 2], [2, 1]],
|
|
616
|
-
... },
|
|
617
|
-
... schema={"a": pl.Array(pl.Int64, 2)},
|
|
618
|
-
... )
|
|
619
|
-
>>> df.with_columns(arg_max=pl.col("a").arr.arg_max())
|
|
620
|
-
shape: (2, 2)
|
|
621
|
-
┌───────────────┬─────────┐
|
|
622
|
-
│ a ┆ arg_max │
|
|
623
|
-
│ --- ┆ --- │
|
|
624
|
-
│ array[i64, 2] ┆ u32 │
|
|
625
|
-
╞═══════════════╪═════════╡
|
|
626
|
-
│ [1, 2] ┆ 1 │
|
|
627
|
-
│ [2, 1] ┆ 0 │
|
|
628
|
-
└───────────────┴─────────┘
|
|
629
|
-
"""
|
|
630
|
-
return wrap_expr(self._pyexpr.arr_arg_max())
|
|
631
|
-
|
|
632
|
-
def get(self, index: int | IntoExprColumn, *, null_on_oob: bool = False) -> Expr:
|
|
633
|
-
"""
|
|
634
|
-
Get the value by index in the sub-arrays.
|
|
635
|
-
|
|
636
|
-
So index `0` would return the first item of every sublist
|
|
637
|
-
and index `-1` would return the last item of every sublist
|
|
638
|
-
if an index is out of bounds, it will return a `None`.
|
|
639
|
-
|
|
640
|
-
Parameters
|
|
641
|
-
----------
|
|
642
|
-
index
|
|
643
|
-
Index to return per sub-array
|
|
644
|
-
null_on_oob
|
|
645
|
-
Behavior if an index is out of bounds:
|
|
646
|
-
True -> set as null
|
|
647
|
-
False -> raise an error
|
|
648
|
-
|
|
649
|
-
Examples
|
|
650
|
-
--------
|
|
651
|
-
>>> df = pl.DataFrame(
|
|
652
|
-
... {"arr": [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx": [1, -2, 0]},
|
|
653
|
-
... schema={"arr": pl.Array(pl.Int32, 3), "idx": pl.Int32},
|
|
654
|
-
... )
|
|
655
|
-
>>> df.with_columns(get=pl.col("arr").arr.get("idx", null_on_oob=True))
|
|
656
|
-
shape: (3, 3)
|
|
657
|
-
┌───────────────┬─────┬─────┐
|
|
658
|
-
│ arr ┆ idx ┆ get │
|
|
659
|
-
│ --- ┆ --- ┆ --- │
|
|
660
|
-
│ array[i32, 3] ┆ i32 ┆ i32 │
|
|
661
|
-
╞═══════════════╪═════╪═════╡
|
|
662
|
-
│ [1, 2, 3] ┆ 1 ┆ 2 │
|
|
663
|
-
│ [4, 5, 6] ┆ -2 ┆ 5 │
|
|
664
|
-
│ [7, 8, 9] ┆ 0 ┆ 7 │
|
|
665
|
-
└───────────────┴─────┴─────┘
|
|
666
|
-
"""
|
|
667
|
-
index_pyexpr = parse_into_expression(index)
|
|
668
|
-
return wrap_expr(self._pyexpr.arr_get(index_pyexpr, null_on_oob))
|
|
669
|
-
|
|
670
|
-
def first(self) -> Expr:
|
|
671
|
-
"""
|
|
672
|
-
Get the first value of the sub-arrays.
|
|
673
|
-
|
|
674
|
-
Examples
|
|
675
|
-
--------
|
|
676
|
-
>>> df = pl.DataFrame(
|
|
677
|
-
... {"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]},
|
|
678
|
-
... schema={"a": pl.Array(pl.Int32, 3)},
|
|
679
|
-
... )
|
|
680
|
-
>>> df.with_columns(first=pl.col("a").arr.first())
|
|
681
|
-
shape: (3, 2)
|
|
682
|
-
┌───────────────┬───────┐
|
|
683
|
-
│ a ┆ first │
|
|
684
|
-
│ --- ┆ --- │
|
|
685
|
-
│ array[i32, 3] ┆ i32 │
|
|
686
|
-
╞═══════════════╪═══════╡
|
|
687
|
-
│ [1, 2, 3] ┆ 1 │
|
|
688
|
-
│ [4, 5, 6] ┆ 4 │
|
|
689
|
-
│ [7, 8, 9] ┆ 7 │
|
|
690
|
-
└───────────────┴───────┘
|
|
691
|
-
"""
|
|
692
|
-
return self.get(0, null_on_oob=True)
|
|
693
|
-
|
|
694
|
-
def last(self) -> Expr:
|
|
695
|
-
"""
|
|
696
|
-
Get the last value of the sub-arrays.
|
|
697
|
-
|
|
698
|
-
Examples
|
|
699
|
-
--------
|
|
700
|
-
>>> df = pl.DataFrame(
|
|
701
|
-
... {"a": [[1, 2, 3], [4, 5, 6], [7, 9, 8]]},
|
|
702
|
-
... schema={"a": pl.Array(pl.Int32, 3)},
|
|
703
|
-
... )
|
|
704
|
-
>>> df.with_columns(last=pl.col("a").arr.last())
|
|
705
|
-
shape: (3, 2)
|
|
706
|
-
┌───────────────┬──────┐
|
|
707
|
-
│ a ┆ last │
|
|
708
|
-
│ --- ┆ --- │
|
|
709
|
-
│ array[i32, 3] ┆ i32 │
|
|
710
|
-
╞═══════════════╪══════╡
|
|
711
|
-
│ [1, 2, 3] ┆ 3 │
|
|
712
|
-
│ [4, 5, 6] ┆ 6 │
|
|
713
|
-
│ [7, 9, 8] ┆ 8 │
|
|
714
|
-
└───────────────┴──────┘
|
|
715
|
-
"""
|
|
716
|
-
return self.get(-1, null_on_oob=True)
|
|
717
|
-
|
|
718
|
-
def join(self, separator: IntoExprColumn, *, ignore_nulls: bool = True) -> Expr:
|
|
719
|
-
"""
|
|
720
|
-
Join all string items in a sub-array and place a separator between them.
|
|
721
|
-
|
|
722
|
-
This errors if inner type of array `!= String`.
|
|
723
|
-
|
|
724
|
-
Parameters
|
|
725
|
-
----------
|
|
726
|
-
separator
|
|
727
|
-
string to separate the items with
|
|
728
|
-
ignore_nulls
|
|
729
|
-
Ignore null values (default).
|
|
730
|
-
|
|
731
|
-
If set to ``False``, null values will be propagated.
|
|
732
|
-
If the sub-list contains any null values, the output is ``None``.
|
|
733
|
-
|
|
734
|
-
Returns
|
|
735
|
-
-------
|
|
736
|
-
Expr
|
|
737
|
-
Expression of data type :class:`String`.
|
|
738
|
-
|
|
739
|
-
Examples
|
|
740
|
-
--------
|
|
741
|
-
>>> df = pl.DataFrame(
|
|
742
|
-
... {"s": [["a", "b"], ["x", "y"]], "separator": ["*", "_"]},
|
|
743
|
-
... schema={
|
|
744
|
-
... "s": pl.Array(pl.String, 2),
|
|
745
|
-
... "separator": pl.String,
|
|
746
|
-
... },
|
|
747
|
-
... )
|
|
748
|
-
>>> df.with_columns(join=pl.col("s").arr.join(pl.col("separator")))
|
|
749
|
-
shape: (2, 3)
|
|
750
|
-
┌───────────────┬───────────┬──────┐
|
|
751
|
-
│ s ┆ separator ┆ join │
|
|
752
|
-
│ --- ┆ --- ┆ --- │
|
|
753
|
-
│ array[str, 2] ┆ str ┆ str │
|
|
754
|
-
╞═══════════════╪═══════════╪══════╡
|
|
755
|
-
│ ["a", "b"] ┆ * ┆ a*b │
|
|
756
|
-
│ ["x", "y"] ┆ _ ┆ x_y │
|
|
757
|
-
└───────────────┴───────────┴──────┘
|
|
758
|
-
"""
|
|
759
|
-
separator_pyexpr = parse_into_expression(separator, str_as_lit=True)
|
|
760
|
-
return wrap_expr(self._pyexpr.arr_join(separator_pyexpr, ignore_nulls))
|
|
761
|
-
|
|
762
|
-
def explode(self) -> Expr:
|
|
763
|
-
"""
|
|
764
|
-
Returns a column with a separate row for every array element.
|
|
765
|
-
|
|
766
|
-
Returns
|
|
767
|
-
-------
|
|
768
|
-
Expr
|
|
769
|
-
Expression with the data type of the array elements.
|
|
770
|
-
|
|
771
|
-
Examples
|
|
772
|
-
--------
|
|
773
|
-
>>> df = pl.DataFrame(
|
|
774
|
-
... {"a": [[1, 2, 3], [4, 5, 6]]}, schema={"a": pl.Array(pl.Int64, 3)}
|
|
775
|
-
... )
|
|
776
|
-
>>> df.select(pl.col("a").arr.explode())
|
|
777
|
-
shape: (6, 1)
|
|
778
|
-
┌─────┐
|
|
779
|
-
│ a │
|
|
780
|
-
│ --- │
|
|
781
|
-
│ i64 │
|
|
782
|
-
╞═════╡
|
|
783
|
-
│ 1 │
|
|
784
|
-
│ 2 │
|
|
785
|
-
│ 3 │
|
|
786
|
-
│ 4 │
|
|
787
|
-
│ 5 │
|
|
788
|
-
│ 6 │
|
|
789
|
-
└─────┘
|
|
790
|
-
"""
|
|
791
|
-
return wrap_expr(self._pyexpr.arr_explode())
|
|
792
|
-
|
|
793
|
-
def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Expr:
|
|
794
|
-
"""
|
|
795
|
-
Check if sub-arrays contain the given item.
|
|
796
|
-
|
|
797
|
-
Parameters
|
|
798
|
-
----------
|
|
799
|
-
item
|
|
800
|
-
Item that will be checked for membership
|
|
801
|
-
nulls_equal : bool, default True
|
|
802
|
-
If True, treat null as a distinct value. Null values will not propagate.
|
|
803
|
-
|
|
804
|
-
Returns
|
|
805
|
-
-------
|
|
806
|
-
Expr
|
|
807
|
-
Expression of data type :class:`Boolean`.
|
|
808
|
-
|
|
809
|
-
Examples
|
|
810
|
-
--------
|
|
811
|
-
>>> df = pl.DataFrame(
|
|
812
|
-
... {"a": [["a", "b"], ["x", "y"], ["a", "c"]]},
|
|
813
|
-
... schema={"a": pl.Array(pl.String, 2)},
|
|
814
|
-
... )
|
|
815
|
-
>>> df.with_columns(contains=pl.col("a").arr.contains("a"))
|
|
816
|
-
shape: (3, 2)
|
|
817
|
-
┌───────────────┬──────────┐
|
|
818
|
-
│ a ┆ contains │
|
|
819
|
-
│ --- ┆ --- │
|
|
820
|
-
│ array[str, 2] ┆ bool │
|
|
821
|
-
╞═══════════════╪══════════╡
|
|
822
|
-
│ ["a", "b"] ┆ true │
|
|
823
|
-
│ ["x", "y"] ┆ false │
|
|
824
|
-
│ ["a", "c"] ┆ true │
|
|
825
|
-
└───────────────┴──────────┘
|
|
826
|
-
"""
|
|
827
|
-
item_pyexpr = parse_into_expression(item, str_as_lit=True)
|
|
828
|
-
return wrap_expr(self._pyexpr.arr_contains(item_pyexpr, nulls_equal))
|
|
829
|
-
|
|
830
|
-
def count_matches(self, element: IntoExpr) -> Expr:
|
|
831
|
-
"""
|
|
832
|
-
Count how often the value produced by `element` occurs.
|
|
833
|
-
|
|
834
|
-
Parameters
|
|
835
|
-
----------
|
|
836
|
-
element
|
|
837
|
-
An expression that produces a single value
|
|
838
|
-
|
|
839
|
-
Examples
|
|
840
|
-
--------
|
|
841
|
-
>>> df = pl.DataFrame(
|
|
842
|
-
... {"a": [[1, 2], [1, 1], [2, 2]]}, schema={"a": pl.Array(pl.Int64, 2)}
|
|
843
|
-
... )
|
|
844
|
-
>>> df.with_columns(number_of_twos=pl.col("a").arr.count_matches(2))
|
|
845
|
-
shape: (3, 2)
|
|
846
|
-
┌───────────────┬────────────────┐
|
|
847
|
-
│ a ┆ number_of_twos │
|
|
848
|
-
│ --- ┆ --- │
|
|
849
|
-
│ array[i64, 2] ┆ u32 │
|
|
850
|
-
╞═══════════════╪════════════════╡
|
|
851
|
-
│ [1, 2] ┆ 1 │
|
|
852
|
-
│ [1, 1] ┆ 0 │
|
|
853
|
-
│ [2, 2] ┆ 2 │
|
|
854
|
-
└───────────────┴────────────────┘
|
|
855
|
-
"""
|
|
856
|
-
element_pyexpr = parse_into_expression(element, str_as_lit=True)
|
|
857
|
-
return wrap_expr(self._pyexpr.arr_count_matches(element_pyexpr))
|
|
858
|
-
|
|
859
|
-
def to_struct(
|
|
860
|
-
self, fields: Sequence[str] | Callable[[int], str] | None = None
|
|
861
|
-
) -> Expr:
|
|
862
|
-
"""
|
|
863
|
-
Convert the Series of type `Array` to a Series of type `Struct`.
|
|
864
|
-
|
|
865
|
-
Parameters
|
|
866
|
-
----------
|
|
867
|
-
fields
|
|
868
|
-
If the name and number of the desired fields is known in advance
|
|
869
|
-
a list of field names can be given, which will be assigned by index.
|
|
870
|
-
Otherwise, to dynamically assign field names, a custom function can be
|
|
871
|
-
used; if neither are set, fields will be `field_0, field_1 .. field_n`.
|
|
872
|
-
|
|
873
|
-
Examples
|
|
874
|
-
--------
|
|
875
|
-
Convert array to struct with default field name assignment:
|
|
876
|
-
|
|
877
|
-
>>> df = pl.DataFrame(
|
|
878
|
-
... {"n": [[0, 1, 2], [3, 4, 5]]}, schema={"n": pl.Array(pl.Int8, 3)}
|
|
879
|
-
... )
|
|
880
|
-
>>> df.with_columns(struct=pl.col("n").arr.to_struct())
|
|
881
|
-
shape: (2, 2)
|
|
882
|
-
┌──────────────┬───────────┐
|
|
883
|
-
│ n ┆ struct │
|
|
884
|
-
│ --- ┆ --- │
|
|
885
|
-
│ array[i8, 3] ┆ struct[3] │
|
|
886
|
-
╞══════════════╪═══════════╡
|
|
887
|
-
│ [0, 1, 2] ┆ {0,1,2} │
|
|
888
|
-
│ [3, 4, 5] ┆ {3,4,5} │
|
|
889
|
-
└──────────────┴───────────┘
|
|
890
|
-
|
|
891
|
-
Convert array to struct with field name assignment by function/index:
|
|
892
|
-
|
|
893
|
-
>>> df = pl.DataFrame(
|
|
894
|
-
... {"n": [[0, 1, 2], [3, 4, 5]]}, schema={"n": pl.Array(pl.Int8, 3)}
|
|
895
|
-
... )
|
|
896
|
-
>>> df.select(pl.col("n").arr.to_struct(fields=lambda idx: f"n{idx}")).rows(
|
|
897
|
-
... named=True
|
|
898
|
-
... )
|
|
899
|
-
[{'n': {'n0': 0, 'n1': 1, 'n2': 2}}, {'n': {'n0': 3, 'n1': 4, 'n2': 5}}]
|
|
900
|
-
|
|
901
|
-
Convert array to struct with field name assignment by
|
|
902
|
-
index from a list of names:
|
|
903
|
-
|
|
904
|
-
>>> df.select(pl.col("n").arr.to_struct(fields=["c1", "c2", "c3"])).rows(
|
|
905
|
-
... named=True
|
|
906
|
-
... )
|
|
907
|
-
[{'n': {'c1': 0, 'c2': 1, 'c3': 2}}, {'n': {'c1': 3, 'c2': 4, 'c3': 5}}]
|
|
908
|
-
"""
|
|
909
|
-
if isinstance(fields, Sequence):
|
|
910
|
-
field_names = list(fields)
|
|
911
|
-
pyexpr = self._pyexpr.arr_to_struct(None)
|
|
912
|
-
return wrap_expr(pyexpr).struct.rename_fields(field_names)
|
|
913
|
-
else:
|
|
914
|
-
pyexpr = self._pyexpr.arr_to_struct(fields)
|
|
915
|
-
return wrap_expr(pyexpr)
|
|
916
|
-
|
|
917
|
-
def shift(self, n: int | IntoExprColumn = 1) -> Expr:
|
|
918
|
-
"""
|
|
919
|
-
Shift array values by the given number of indices.
|
|
920
|
-
|
|
921
|
-
Parameters
|
|
922
|
-
----------
|
|
923
|
-
n
|
|
924
|
-
Number of indices to shift forward. If a negative value is passed, values
|
|
925
|
-
are shifted in the opposite direction instead.
|
|
926
|
-
|
|
927
|
-
Notes
|
|
928
|
-
-----
|
|
929
|
-
This method is similar to the `LAG` operation in SQL when the value for `n`
|
|
930
|
-
is positive. With a negative value for `n`, it is similar to `LEAD`.
|
|
931
|
-
|
|
932
|
-
Examples
|
|
933
|
-
--------
|
|
934
|
-
By default, array values are shifted forward by one index.
|
|
935
|
-
|
|
936
|
-
>>> df = pl.DataFrame(
|
|
937
|
-
... {"a": [[1, 2, 3], [4, 5, 6]]}, schema={"a": pl.Array(pl.Int64, 3)}
|
|
938
|
-
... )
|
|
939
|
-
>>> df.with_columns(shift=pl.col("a").arr.shift())
|
|
940
|
-
shape: (2, 2)
|
|
941
|
-
┌───────────────┬───────────────┐
|
|
942
|
-
│ a ┆ shift │
|
|
943
|
-
│ --- ┆ --- │
|
|
944
|
-
│ array[i64, 3] ┆ array[i64, 3] │
|
|
945
|
-
╞═══════════════╪═══════════════╡
|
|
946
|
-
│ [1, 2, 3] ┆ [null, 1, 2] │
|
|
947
|
-
│ [4, 5, 6] ┆ [null, 4, 5] │
|
|
948
|
-
└───────────────┴───────────────┘
|
|
949
|
-
|
|
950
|
-
Pass a negative value to shift in the opposite direction instead.
|
|
951
|
-
|
|
952
|
-
>>> df.with_columns(shift=pl.col("a").arr.shift(-2))
|
|
953
|
-
shape: (2, 2)
|
|
954
|
-
┌───────────────┬─────────────────┐
|
|
955
|
-
│ a ┆ shift │
|
|
956
|
-
│ --- ┆ --- │
|
|
957
|
-
│ array[i64, 3] ┆ array[i64, 3] │
|
|
958
|
-
╞═══════════════╪═════════════════╡
|
|
959
|
-
│ [1, 2, 3] ┆ [3, null, null] │
|
|
960
|
-
│ [4, 5, 6] ┆ [6, null, null] │
|
|
961
|
-
└───────────────┴─────────────────┘
|
|
962
|
-
"""
|
|
963
|
-
n_pyexpr = parse_into_expression(n)
|
|
964
|
-
return wrap_expr(self._pyexpr.arr_shift(n_pyexpr))
|