polars-runtime-compat 1.34.0b2__cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
polars/expr/struct.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from polars._utils.parse import parse_into_list_of_expressions
|
|
7
|
+
from polars._utils.various import qualified_type_name
|
|
8
|
+
from polars._utils.wrap import wrap_expr
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from collections.abc import Iterable, Sequence
|
|
12
|
+
|
|
13
|
+
from polars import Expr
|
|
14
|
+
from polars._typing import IntoExpr
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ExprStructNameSpace:
|
|
18
|
+
"""Namespace for struct related expressions."""
|
|
19
|
+
|
|
20
|
+
_accessor = "struct"
|
|
21
|
+
|
|
22
|
+
def __init__(self, expr: Expr) -> None:
|
|
23
|
+
self._pyexpr = expr._pyexpr
|
|
24
|
+
|
|
25
|
+
def __getitem__(self, item: str | int) -> Expr:
|
|
26
|
+
if isinstance(item, str):
|
|
27
|
+
return self.field(item)
|
|
28
|
+
elif isinstance(item, int):
|
|
29
|
+
return wrap_expr(self._pyexpr.struct_field_by_index(item))
|
|
30
|
+
else:
|
|
31
|
+
msg = f"expected type 'int | str', got {qualified_type_name(item)!r} ({item!r})"
|
|
32
|
+
raise TypeError(msg)
|
|
33
|
+
|
|
34
|
+
def field(self, name: str | list[str], *more_names: str) -> Expr:
|
|
35
|
+
"""
|
|
36
|
+
Retrieve one or multiple `Struct` field(s) as a new Series.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
name
|
|
41
|
+
Name of the struct field to retrieve.
|
|
42
|
+
*more_names
|
|
43
|
+
Additional struct field names.
|
|
44
|
+
|
|
45
|
+
Examples
|
|
46
|
+
--------
|
|
47
|
+
>>> df = pl.DataFrame(
|
|
48
|
+
... {
|
|
49
|
+
... "aaa": [1, 2],
|
|
50
|
+
... "bbb": ["ab", "cd"],
|
|
51
|
+
... "ccc": [True, None],
|
|
52
|
+
... "ddd": [[1, 2], [3]],
|
|
53
|
+
... }
|
|
54
|
+
... ).select(pl.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
|
|
55
|
+
>>> df
|
|
56
|
+
shape: (2, 1)
|
|
57
|
+
┌──────────────────────┐
|
|
58
|
+
│ struct_col │
|
|
59
|
+
│ --- │
|
|
60
|
+
│ struct[4] │
|
|
61
|
+
╞══════════════════════╡
|
|
62
|
+
│ {1,"ab",true,[1, 2]} │
|
|
63
|
+
│ {2,"cd",null,[3]} │
|
|
64
|
+
└──────────────────────┘
|
|
65
|
+
|
|
66
|
+
Retrieve struct field(s) as Series:
|
|
67
|
+
|
|
68
|
+
>>> df.select(pl.col("struct_col").struct.field("bbb"))
|
|
69
|
+
shape: (2, 1)
|
|
70
|
+
┌─────┐
|
|
71
|
+
│ bbb │
|
|
72
|
+
│ --- │
|
|
73
|
+
│ str │
|
|
74
|
+
╞═════╡
|
|
75
|
+
│ ab │
|
|
76
|
+
│ cd │
|
|
77
|
+
└─────┘
|
|
78
|
+
|
|
79
|
+
>>> df.select(
|
|
80
|
+
... pl.col("struct_col").struct.field("bbb"),
|
|
81
|
+
... pl.col("struct_col").struct.field("ddd"),
|
|
82
|
+
... )
|
|
83
|
+
shape: (2, 2)
|
|
84
|
+
┌─────┬───────────┐
|
|
85
|
+
│ bbb ┆ ddd │
|
|
86
|
+
│ --- ┆ --- │
|
|
87
|
+
│ str ┆ list[i64] │
|
|
88
|
+
╞═════╪═══════════╡
|
|
89
|
+
│ ab ┆ [1, 2] │
|
|
90
|
+
│ cd ┆ [3] │
|
|
91
|
+
└─────┴───────────┘
|
|
92
|
+
|
|
93
|
+
Use wildcard expansion:
|
|
94
|
+
|
|
95
|
+
>>> df.select(pl.col("struct_col").struct.field("*"))
|
|
96
|
+
shape: (2, 4)
|
|
97
|
+
┌─────┬─────┬──────┬───────────┐
|
|
98
|
+
│ aaa ┆ bbb ┆ ccc ┆ ddd │
|
|
99
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
100
|
+
│ i64 ┆ str ┆ bool ┆ list[i64] │
|
|
101
|
+
╞═════╪═════╪══════╪═══════════╡
|
|
102
|
+
│ 1 ┆ ab ┆ true ┆ [1, 2] │
|
|
103
|
+
│ 2 ┆ cd ┆ null ┆ [3] │
|
|
104
|
+
└─────┴─────┴──────┴───────────┘
|
|
105
|
+
|
|
106
|
+
Retrieve multiple fields by name:
|
|
107
|
+
|
|
108
|
+
>>> df.select(pl.col("struct_col").struct.field("aaa", "bbb"))
|
|
109
|
+
shape: (2, 2)
|
|
110
|
+
┌─────┬─────┐
|
|
111
|
+
│ aaa ┆ bbb │
|
|
112
|
+
│ --- ┆ --- │
|
|
113
|
+
│ i64 ┆ str │
|
|
114
|
+
╞═════╪═════╡
|
|
115
|
+
│ 1 ┆ ab │
|
|
116
|
+
│ 2 ┆ cd │
|
|
117
|
+
└─────┴─────┘
|
|
118
|
+
|
|
119
|
+
Retrieve multiple fields by regex expansion:
|
|
120
|
+
|
|
121
|
+
>>> df.select(pl.col("struct_col").struct.field("^a.*|b.*$"))
|
|
122
|
+
shape: (2, 2)
|
|
123
|
+
┌─────┬─────┐
|
|
124
|
+
│ aaa ┆ bbb │
|
|
125
|
+
│ --- ┆ --- │
|
|
126
|
+
│ i64 ┆ str │
|
|
127
|
+
╞═════╪═════╡
|
|
128
|
+
│ 1 ┆ ab │
|
|
129
|
+
│ 2 ┆ cd │
|
|
130
|
+
└─────┴─────┘
|
|
131
|
+
|
|
132
|
+
Notes
|
|
133
|
+
-----
|
|
134
|
+
The `struct` namespace has implemented `__getitem__`
|
|
135
|
+
so you can also access fields by index:
|
|
136
|
+
|
|
137
|
+
>>> df.select(pl.col("struct_col").struct[1])
|
|
138
|
+
shape: (2, 1)
|
|
139
|
+
┌─────┐
|
|
140
|
+
│ bbb │
|
|
141
|
+
│ --- │
|
|
142
|
+
│ str │
|
|
143
|
+
╞═════╡
|
|
144
|
+
│ ab │
|
|
145
|
+
│ cd │
|
|
146
|
+
└─────┘
|
|
147
|
+
"""
|
|
148
|
+
if more_names:
|
|
149
|
+
name = [*([name] if isinstance(name, str) else name), *more_names]
|
|
150
|
+
if isinstance(name, list):
|
|
151
|
+
return wrap_expr(self._pyexpr.struct_multiple_fields(name))
|
|
152
|
+
|
|
153
|
+
return wrap_expr(self._pyexpr.struct_field_by_name(name))
|
|
154
|
+
|
|
155
|
+
def unnest(self) -> Expr:
|
|
156
|
+
"""
|
|
157
|
+
Expand the struct into its individual fields.
|
|
158
|
+
|
|
159
|
+
Alias for `Expr.struct.field("*")`.
|
|
160
|
+
|
|
161
|
+
>>> df = pl.DataFrame(
|
|
162
|
+
... {
|
|
163
|
+
... "aaa": [1, 2],
|
|
164
|
+
... "bbb": ["ab", "cd"],
|
|
165
|
+
... "ccc": [True, None],
|
|
166
|
+
... "ddd": [[1, 2], [3]],
|
|
167
|
+
... }
|
|
168
|
+
... ).select(pl.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
|
|
169
|
+
>>> df
|
|
170
|
+
shape: (2, 1)
|
|
171
|
+
┌──────────────────────┐
|
|
172
|
+
│ struct_col │
|
|
173
|
+
│ --- │
|
|
174
|
+
│ struct[4] │
|
|
175
|
+
╞══════════════════════╡
|
|
176
|
+
│ {1,"ab",true,[1, 2]} │
|
|
177
|
+
│ {2,"cd",null,[3]} │
|
|
178
|
+
└──────────────────────┘
|
|
179
|
+
>>> df.select(pl.col("struct_col").struct.unnest())
|
|
180
|
+
shape: (2, 4)
|
|
181
|
+
┌─────┬─────┬──────┬───────────┐
|
|
182
|
+
│ aaa ┆ bbb ┆ ccc ┆ ddd │
|
|
183
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
184
|
+
│ i64 ┆ str ┆ bool ┆ list[i64] │
|
|
185
|
+
╞═════╪═════╪══════╪═══════════╡
|
|
186
|
+
│ 1 ┆ ab ┆ true ┆ [1, 2] │
|
|
187
|
+
│ 2 ┆ cd ┆ null ┆ [3] │
|
|
188
|
+
└─────┴─────┴──────┴───────────┘
|
|
189
|
+
"""
|
|
190
|
+
return self.field("*")
|
|
191
|
+
|
|
192
|
+
def rename_fields(self, names: Sequence[str]) -> Expr:
|
|
193
|
+
"""
|
|
194
|
+
Rename the fields of the struct.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
names
|
|
199
|
+
New names, given in the same order as the struct's fields.
|
|
200
|
+
|
|
201
|
+
Examples
|
|
202
|
+
--------
|
|
203
|
+
>>> df = pl.DataFrame(
|
|
204
|
+
... {
|
|
205
|
+
... "aaa": [1, 2],
|
|
206
|
+
... "bbb": ["ab", "cd"],
|
|
207
|
+
... "ccc": [True, None],
|
|
208
|
+
... "ddd": [[1, 2], [3]],
|
|
209
|
+
... }
|
|
210
|
+
... ).select(pl.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
|
|
211
|
+
>>> df
|
|
212
|
+
shape: (2, 1)
|
|
213
|
+
┌──────────────────────┐
|
|
214
|
+
│ struct_col │
|
|
215
|
+
│ --- │
|
|
216
|
+
│ struct[4] │
|
|
217
|
+
╞══════════════════════╡
|
|
218
|
+
│ {1,"ab",true,[1, 2]} │
|
|
219
|
+
│ {2,"cd",null,[3]} │
|
|
220
|
+
└──────────────────────┘
|
|
221
|
+
|
|
222
|
+
>>> df.unnest("struct_col")
|
|
223
|
+
shape: (2, 4)
|
|
224
|
+
┌─────┬─────┬──────┬───────────┐
|
|
225
|
+
│ aaa ┆ bbb ┆ ccc ┆ ddd │
|
|
226
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
227
|
+
│ i64 ┆ str ┆ bool ┆ list[i64] │
|
|
228
|
+
╞═════╪═════╪══════╪═══════════╡
|
|
229
|
+
│ 1 ┆ ab ┆ true ┆ [1, 2] │
|
|
230
|
+
│ 2 ┆ cd ┆ null ┆ [3] │
|
|
231
|
+
└─────┴─────┴──────┴───────────┘
|
|
232
|
+
|
|
233
|
+
Rename fields:
|
|
234
|
+
|
|
235
|
+
>>> df = df.select(
|
|
236
|
+
... pl.col("struct_col").struct.rename_fields(["www", "xxx", "yyy", "zzz"])
|
|
237
|
+
... )
|
|
238
|
+
>>> df.unnest("struct_col")
|
|
239
|
+
shape: (2, 4)
|
|
240
|
+
┌─────┬─────┬──────┬───────────┐
|
|
241
|
+
│ www ┆ xxx ┆ yyy ┆ zzz │
|
|
242
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
243
|
+
│ i64 ┆ str ┆ bool ┆ list[i64] │
|
|
244
|
+
╞═════╪═════╪══════╪═══════════╡
|
|
245
|
+
│ 1 ┆ ab ┆ true ┆ [1, 2] │
|
|
246
|
+
│ 2 ┆ cd ┆ null ┆ [3] │
|
|
247
|
+
└─────┴─────┴──────┴───────────┘
|
|
248
|
+
|
|
249
|
+
Following a rename, the previous field names (obviously) cannot be referenced:
|
|
250
|
+
|
|
251
|
+
>>> df.select(pl.col("struct_col").struct.field("aaa")) # doctest: +SKIP
|
|
252
|
+
StructFieldNotFoundError: aaa
|
|
253
|
+
"""
|
|
254
|
+
return wrap_expr(self._pyexpr.struct_rename_fields(names))
|
|
255
|
+
|
|
256
|
+
def json_encode(self) -> Expr:
|
|
257
|
+
"""
|
|
258
|
+
Convert this struct to a string column with json values.
|
|
259
|
+
|
|
260
|
+
Examples
|
|
261
|
+
--------
|
|
262
|
+
>>> pl.DataFrame(
|
|
263
|
+
... {"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}
|
|
264
|
+
... ).with_columns(pl.col("a").struct.json_encode().alias("encoded"))
|
|
265
|
+
shape: (2, 2)
|
|
266
|
+
┌──────────────────┬────────────────────────┐
|
|
267
|
+
│ a ┆ encoded │
|
|
268
|
+
│ --- ┆ --- │
|
|
269
|
+
│ struct[2] ┆ str │
|
|
270
|
+
╞══════════════════╪════════════════════════╡
|
|
271
|
+
│ {[1, 2],[45]} ┆ {"a":[1,2],"b":[45]} │
|
|
272
|
+
│ {[9, 1, 3],null} ┆ {"a":[9,1,3],"b":null} │
|
|
273
|
+
└──────────────────┴────────────────────────┘
|
|
274
|
+
"""
|
|
275
|
+
return wrap_expr(self._pyexpr.struct_json_encode())
|
|
276
|
+
|
|
277
|
+
def with_fields(
|
|
278
|
+
self,
|
|
279
|
+
*exprs: IntoExpr | Iterable[IntoExpr],
|
|
280
|
+
**named_exprs: IntoExpr,
|
|
281
|
+
) -> Expr:
|
|
282
|
+
"""
|
|
283
|
+
Add or overwrite fields of this struct.
|
|
284
|
+
|
|
285
|
+
This is similar to `with_columns` on `DataFrame`.
|
|
286
|
+
|
|
287
|
+
.. versionadded:: 0.20.27
|
|
288
|
+
|
|
289
|
+
Examples
|
|
290
|
+
--------
|
|
291
|
+
>>> df = pl.DataFrame(
|
|
292
|
+
... {
|
|
293
|
+
... "coords": [{"x": 1, "y": 4}, {"x": 4, "y": 9}, {"x": 9, "y": 16}],
|
|
294
|
+
... "multiply": [10, 2, 3],
|
|
295
|
+
... }
|
|
296
|
+
... )
|
|
297
|
+
>>> df
|
|
298
|
+
shape: (3, 2)
|
|
299
|
+
┌───────────┬──────────┐
|
|
300
|
+
│ coords ┆ multiply │
|
|
301
|
+
│ --- ┆ --- │
|
|
302
|
+
│ struct[2] ┆ i64 │
|
|
303
|
+
╞═══════════╪══════════╡
|
|
304
|
+
│ {1,4} ┆ 10 │
|
|
305
|
+
│ {4,9} ┆ 2 │
|
|
306
|
+
│ {9,16} ┆ 3 │
|
|
307
|
+
└───────────┴──────────┘
|
|
308
|
+
>>> df = df.with_columns(
|
|
309
|
+
... pl.col("coords").struct.with_fields(
|
|
310
|
+
... pl.field("x").sqrt(),
|
|
311
|
+
... y_mul=pl.field("y") * pl.col("multiply"),
|
|
312
|
+
... )
|
|
313
|
+
... )
|
|
314
|
+
>>> df
|
|
315
|
+
shape: (3, 2)
|
|
316
|
+
┌─────────────┬──────────┐
|
|
317
|
+
│ coords ┆ multiply │
|
|
318
|
+
│ --- ┆ --- │
|
|
319
|
+
│ struct[3] ┆ i64 │
|
|
320
|
+
╞═════════════╪══════════╡
|
|
321
|
+
│ {1.0,4,40} ┆ 10 │
|
|
322
|
+
│ {2.0,9,18} ┆ 2 │
|
|
323
|
+
│ {3.0,16,48} ┆ 3 │
|
|
324
|
+
└─────────────┴──────────┘
|
|
325
|
+
>>> df.unnest("coords")
|
|
326
|
+
shape: (3, 4)
|
|
327
|
+
┌─────┬─────┬───────┬──────────┐
|
|
328
|
+
│ x ┆ y ┆ y_mul ┆ multiply │
|
|
329
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
330
|
+
│ f64 ┆ i64 ┆ i64 ┆ i64 │
|
|
331
|
+
╞═════╪═════╪═══════╪══════════╡
|
|
332
|
+
│ 1.0 ┆ 4 ┆ 40 ┆ 10 │
|
|
333
|
+
│ 2.0 ┆ 9 ┆ 18 ┆ 2 │
|
|
334
|
+
│ 3.0 ┆ 16 ┆ 48 ┆ 3 │
|
|
335
|
+
└─────┴─────┴───────┴──────────┘
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
*exprs
|
|
340
|
+
Field(s) to add, specified as positional arguments.
|
|
341
|
+
Accepts expression input. Strings are parsed as column names, other
|
|
342
|
+
non-expression inputs are parsed as literals.
|
|
343
|
+
**named_exprs
|
|
344
|
+
Additional fields to add, specified as keyword arguments.
|
|
345
|
+
The columns will be renamed to the keyword used.
|
|
346
|
+
|
|
347
|
+
See Also
|
|
348
|
+
--------
|
|
349
|
+
field
|
|
350
|
+
"""
|
|
351
|
+
structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0)))
|
|
352
|
+
|
|
353
|
+
pyexprs = parse_into_list_of_expressions(
|
|
354
|
+
*exprs, **named_exprs, __structify=structify
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
return wrap_expr(self._pyexpr.struct_with_fields(pyexprs))
|
polars/expr/whenthen.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
import polars.functions as F
|
|
6
|
+
from polars._utils.parse import (
|
|
7
|
+
parse_into_expression,
|
|
8
|
+
parse_predicates_constraints_into_expression,
|
|
9
|
+
)
|
|
10
|
+
from polars._utils.wrap import wrap_expr
|
|
11
|
+
from polars.expr.expr import Expr
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Iterable
|
|
15
|
+
|
|
16
|
+
from polars._plr import PyExpr
|
|
17
|
+
from polars._typing import IntoExpr
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class When:
|
|
21
|
+
"""
|
|
22
|
+
Utility class for the `when-then-otherwise` expression.
|
|
23
|
+
|
|
24
|
+
Represents the initial state of the expression after `pl.when(...)` is called.
|
|
25
|
+
|
|
26
|
+
In this state, `then` must be called to continue to finish the expression.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, when: Any) -> None:
|
|
30
|
+
self._when = when
|
|
31
|
+
|
|
32
|
+
def then(self, statement: IntoExpr) -> Then:
|
|
33
|
+
"""
|
|
34
|
+
Attach a statement to the corresponding condition.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
statement
|
|
39
|
+
The statement to apply if the corresponding condition is true.
|
|
40
|
+
Accepts expression input. Strings are parsed as column names, other
|
|
41
|
+
non-expression inputs are parsed as literals.
|
|
42
|
+
"""
|
|
43
|
+
statement_pyexpr = parse_into_expression(statement)
|
|
44
|
+
return Then(self._when.then(statement_pyexpr))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Then(Expr):
|
|
48
|
+
"""
|
|
49
|
+
Utility class for the `when-then-otherwise` expression.
|
|
50
|
+
|
|
51
|
+
Represents the state of the expression after `pl.when(...).then(...)` is called.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, then: Any) -> None:
|
|
55
|
+
self._then = then
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def _from_pyexpr(cls, pyexpr: PyExpr) -> Expr:
|
|
59
|
+
return wrap_expr(pyexpr)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def _pyexpr(self) -> PyExpr: # type: ignore[override]
|
|
63
|
+
return self._then.otherwise(F.lit(None)._pyexpr)
|
|
64
|
+
|
|
65
|
+
def when(
|
|
66
|
+
self,
|
|
67
|
+
*predicates: IntoExpr | Iterable[IntoExpr],
|
|
68
|
+
**constraints: Any,
|
|
69
|
+
) -> ChainedWhen:
|
|
70
|
+
"""
|
|
71
|
+
Add a condition to the `when-then-otherwise` expression.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
predicates
|
|
76
|
+
Condition(s) that must be met in order to apply the subsequent statement.
|
|
77
|
+
Accepts one or more boolean expressions, which are implicitly combined with
|
|
78
|
+
`&`. String input is parsed as a column name.
|
|
79
|
+
constraints
|
|
80
|
+
Apply conditions as `col_name = value` keyword arguments that are treated as
|
|
81
|
+
equality matches, such as `x = 123`. As with the predicates parameter,
|
|
82
|
+
multiple conditions are implicitly combined using `&`.
|
|
83
|
+
"""
|
|
84
|
+
condition_pyexpr = parse_predicates_constraints_into_expression(
|
|
85
|
+
*predicates, **constraints
|
|
86
|
+
)
|
|
87
|
+
return ChainedWhen(self._then.when(condition_pyexpr))
|
|
88
|
+
|
|
89
|
+
def otherwise(self, statement: IntoExpr) -> Expr:
|
|
90
|
+
"""
|
|
91
|
+
Define a default for the `when-then-otherwise` expression.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
statement
|
|
96
|
+
The statement to apply if all conditions are false.
|
|
97
|
+
Accepts expression input. Strings are parsed as column names, other
|
|
98
|
+
non-expression inputs are parsed as literals.
|
|
99
|
+
"""
|
|
100
|
+
statement_pyexpr = parse_into_expression(statement)
|
|
101
|
+
return wrap_expr(self._then.otherwise(statement_pyexpr))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class ChainedWhen:
|
|
105
|
+
"""
|
|
106
|
+
Utility class for the `when-then-otherwise` expression.
|
|
107
|
+
|
|
108
|
+
Represents the state of the expression after an additional `when` is called.
|
|
109
|
+
|
|
110
|
+
In this state, `then` must be called to continue to finish the expression.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self, chained_when: Any) -> None:
|
|
114
|
+
self._chained_when = chained_when
|
|
115
|
+
|
|
116
|
+
def then(self, statement: IntoExpr) -> ChainedThen:
|
|
117
|
+
"""
|
|
118
|
+
Attach a statement to the corresponding condition.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
statement
|
|
123
|
+
The statement to apply if the corresponding condition is true.
|
|
124
|
+
Accepts expression input. Strings are parsed as column names, other
|
|
125
|
+
non-expression inputs are parsed as literals.
|
|
126
|
+
"""
|
|
127
|
+
statement_pyexpr = parse_into_expression(statement)
|
|
128
|
+
return ChainedThen(self._chained_when.then(statement_pyexpr))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ChainedThen(Expr):
|
|
132
|
+
"""
|
|
133
|
+
Utility class for the `when-then-otherwise` expression.
|
|
134
|
+
|
|
135
|
+
Represents the state of the expression after an additional `then` is called.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def __init__(self, chained_then: Any) -> None:
|
|
139
|
+
self._chained_then = chained_then
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def _from_pyexpr(cls, pyexpr: PyExpr) -> Expr:
|
|
143
|
+
return wrap_expr(pyexpr)
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def _pyexpr(self) -> PyExpr: # type: ignore[override]
|
|
147
|
+
return self._chained_then.otherwise(F.lit(None)._pyexpr)
|
|
148
|
+
|
|
149
|
+
def when(
|
|
150
|
+
self,
|
|
151
|
+
*predicates: IntoExpr | Iterable[IntoExpr],
|
|
152
|
+
**constraints: Any,
|
|
153
|
+
) -> ChainedWhen:
|
|
154
|
+
"""
|
|
155
|
+
Add another condition to the `when-then-otherwise` expression.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
predicates
|
|
160
|
+
Condition(s) that must be met in order to apply the subsequent statement.
|
|
161
|
+
Accepts one or more boolean expressions, which are implicitly combined with
|
|
162
|
+
`&`. String input is parsed as a column name.
|
|
163
|
+
constraints
|
|
164
|
+
Apply conditions as `col_name = value` keyword arguments that are treated as
|
|
165
|
+
equality matches, such as `x = 123`. As with the predicates parameter,
|
|
166
|
+
multiple conditions are implicitly combined using `&`.
|
|
167
|
+
"""
|
|
168
|
+
condition_pyexpr = parse_predicates_constraints_into_expression(
|
|
169
|
+
*predicates, **constraints
|
|
170
|
+
)
|
|
171
|
+
return ChainedWhen(self._chained_then.when(condition_pyexpr))
|
|
172
|
+
|
|
173
|
+
def otherwise(self, statement: IntoExpr) -> Expr:
|
|
174
|
+
"""
|
|
175
|
+
Define a default for the `when-then-otherwise` expression.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
statement
|
|
180
|
+
The statement to apply if all conditions are false.
|
|
181
|
+
Accepts expression input. Strings are parsed as column names, other
|
|
182
|
+
non-expression inputs are parsed as literals.
|
|
183
|
+
"""
|
|
184
|
+
statement_pyexpr = parse_into_expression(statement)
|
|
185
|
+
return wrap_expr(self._chained_then.otherwise(statement_pyexpr))
|