polars-runtime-compat 1.34.0b2__cp39-abi3-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
|
@@ -0,0 +1,848 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from typing import TYPE_CHECKING, overload
|
|
5
|
+
|
|
6
|
+
from polars import functions as F
|
|
7
|
+
from polars._utils.parse import (
|
|
8
|
+
parse_into_expression,
|
|
9
|
+
parse_into_list_of_expressions,
|
|
10
|
+
)
|
|
11
|
+
from polars._utils.unstable import issue_unstable_warning
|
|
12
|
+
from polars._utils.wrap import wrap_expr
|
|
13
|
+
from polars.datatypes import Date, Struct, Time
|
|
14
|
+
|
|
15
|
+
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
16
|
+
import polars._plr as plr
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Iterable
|
|
21
|
+
from typing import Literal
|
|
22
|
+
|
|
23
|
+
from polars import Expr, Series
|
|
24
|
+
from polars._typing import Ambiguous, IntoExpr, SchemaDict, TimeUnit
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def datetime_(
|
|
28
|
+
year: int | IntoExpr,
|
|
29
|
+
month: int | IntoExpr,
|
|
30
|
+
day: int | IntoExpr,
|
|
31
|
+
hour: int | IntoExpr | None = None,
|
|
32
|
+
minute: int | IntoExpr | None = None,
|
|
33
|
+
second: int | IntoExpr | None = None,
|
|
34
|
+
microsecond: int | IntoExpr | None = None,
|
|
35
|
+
*,
|
|
36
|
+
time_unit: TimeUnit = "us",
|
|
37
|
+
time_zone: str | None = None,
|
|
38
|
+
ambiguous: Ambiguous | Expr = "raise",
|
|
39
|
+
) -> Expr:
|
|
40
|
+
"""
|
|
41
|
+
Create a Polars literal expression of type Datetime.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
year
|
|
46
|
+
Column or literal.
|
|
47
|
+
month
|
|
48
|
+
Column or literal, ranging from 1-12.
|
|
49
|
+
day
|
|
50
|
+
Column or literal, ranging from 1-31.
|
|
51
|
+
hour
|
|
52
|
+
Column or literal, ranging from 0-23.
|
|
53
|
+
minute
|
|
54
|
+
Column or literal, ranging from 0-59.
|
|
55
|
+
second
|
|
56
|
+
Column or literal, ranging from 0-59.
|
|
57
|
+
microsecond
|
|
58
|
+
Column or literal, ranging from 0-999999.
|
|
59
|
+
time_unit : {'us', 'ms', 'ns'}
|
|
60
|
+
Time unit of the resulting expression.
|
|
61
|
+
time_zone
|
|
62
|
+
Time zone of the resulting expression.
|
|
63
|
+
ambiguous
|
|
64
|
+
Determine how to deal with ambiguous datetimes:
|
|
65
|
+
|
|
66
|
+
- `'raise'` (default): raise
|
|
67
|
+
- `'earliest'`: use the earliest datetime
|
|
68
|
+
- `'latest'`: use the latest datetime
|
|
69
|
+
- `'null'`: set to null
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
Expr
|
|
74
|
+
Expression of data type :class:`Datetime`.
|
|
75
|
+
|
|
76
|
+
Examples
|
|
77
|
+
--------
|
|
78
|
+
>>> df = pl.DataFrame(
|
|
79
|
+
... {
|
|
80
|
+
... "month": [1, 2, 3],
|
|
81
|
+
... "day": [4, 5, 6],
|
|
82
|
+
... "hour": [12, 13, 14],
|
|
83
|
+
... "minute": [15, 30, 45],
|
|
84
|
+
... }
|
|
85
|
+
... )
|
|
86
|
+
>>> df.with_columns(
|
|
87
|
+
... pl.datetime(
|
|
88
|
+
... 2024,
|
|
89
|
+
... pl.col("month"),
|
|
90
|
+
... pl.col("day"),
|
|
91
|
+
... pl.col("hour"),
|
|
92
|
+
... pl.col("minute"),
|
|
93
|
+
... time_zone="Australia/Sydney",
|
|
94
|
+
... )
|
|
95
|
+
... )
|
|
96
|
+
shape: (3, 5)
|
|
97
|
+
┌───────┬─────┬──────┬────────┬────────────────────────────────┐
|
|
98
|
+
│ month ┆ day ┆ hour ┆ minute ┆ datetime │
|
|
99
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
100
|
+
│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ datetime[μs, Australia/Sydney] │
|
|
101
|
+
╞═══════╪═════╪══════╪════════╪════════════════════════════════╡
|
|
102
|
+
│ 1 ┆ 4 ┆ 12 ┆ 15 ┆ 2024-01-04 12:15:00 AEDT │
|
|
103
|
+
│ 2 ┆ 5 ┆ 13 ┆ 30 ┆ 2024-02-05 13:30:00 AEDT │
|
|
104
|
+
│ 3 ┆ 6 ┆ 14 ┆ 45 ┆ 2024-03-06 14:45:00 AEDT │
|
|
105
|
+
└───────┴─────┴──────┴────────┴────────────────────────────────┘
|
|
106
|
+
|
|
107
|
+
We can also use `pl.datetime` for filtering:
|
|
108
|
+
|
|
109
|
+
>>> from datetime import datetime
|
|
110
|
+
>>> df = pl.DataFrame(
|
|
111
|
+
... {
|
|
112
|
+
... "start": [
|
|
113
|
+
... datetime(2024, 1, 1, 0, 0, 0),
|
|
114
|
+
... datetime(2024, 1, 1, 0, 0, 0),
|
|
115
|
+
... datetime(2024, 1, 1, 0, 0, 0),
|
|
116
|
+
... ],
|
|
117
|
+
... "end": [
|
|
118
|
+
... datetime(2024, 5, 1, 20, 15, 10),
|
|
119
|
+
... datetime(2024, 7, 1, 21, 25, 20),
|
|
120
|
+
... datetime(2024, 9, 1, 22, 35, 30),
|
|
121
|
+
... ],
|
|
122
|
+
... }
|
|
123
|
+
... )
|
|
124
|
+
>>> df.filter(pl.col("end") > pl.datetime(2024, 6, 1))
|
|
125
|
+
shape: (2, 2)
|
|
126
|
+
┌─────────────────────┬─────────────────────┐
|
|
127
|
+
│ start ┆ end │
|
|
128
|
+
│ --- ┆ --- │
|
|
129
|
+
│ datetime[μs] ┆ datetime[μs] │
|
|
130
|
+
╞═════════════════════╪═════════════════════╡
|
|
131
|
+
│ 2024-01-01 00:00:00 ┆ 2024-07-01 21:25:20 │
|
|
132
|
+
│ 2024-01-01 00:00:00 ┆ 2024-09-01 22:35:30 │
|
|
133
|
+
└─────────────────────┴─────────────────────┘
|
|
134
|
+
"""
|
|
135
|
+
ambiguous_expr = parse_into_expression(ambiguous, str_as_lit=True)
|
|
136
|
+
year_expr = parse_into_expression(year)
|
|
137
|
+
month_expr = parse_into_expression(month)
|
|
138
|
+
day_expr = parse_into_expression(day)
|
|
139
|
+
|
|
140
|
+
hour_expr = parse_into_expression(hour) if hour is not None else None
|
|
141
|
+
minute_expr = parse_into_expression(minute) if minute is not None else None
|
|
142
|
+
second_expr = parse_into_expression(second) if second is not None else None
|
|
143
|
+
microsecond_expr = (
|
|
144
|
+
parse_into_expression(microsecond) if microsecond is not None else None
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return wrap_expr(
|
|
148
|
+
plr.datetime(
|
|
149
|
+
year_expr,
|
|
150
|
+
month_expr,
|
|
151
|
+
day_expr,
|
|
152
|
+
hour_expr,
|
|
153
|
+
minute_expr,
|
|
154
|
+
second_expr,
|
|
155
|
+
microsecond_expr,
|
|
156
|
+
time_unit,
|
|
157
|
+
time_zone,
|
|
158
|
+
ambiguous_expr,
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def date_(
|
|
164
|
+
year: Expr | str | int,
|
|
165
|
+
month: Expr | str | int,
|
|
166
|
+
day: Expr | str | int,
|
|
167
|
+
) -> Expr:
|
|
168
|
+
"""
|
|
169
|
+
Create a Polars literal expression of type Date.
|
|
170
|
+
|
|
171
|
+
Parameters
|
|
172
|
+
----------
|
|
173
|
+
year
|
|
174
|
+
column or literal.
|
|
175
|
+
month
|
|
176
|
+
column or literal, ranging from 1-12.
|
|
177
|
+
day
|
|
178
|
+
column or literal, ranging from 1-31.
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
Expr
|
|
183
|
+
Expression of data type :class:`Date`.
|
|
184
|
+
|
|
185
|
+
Examples
|
|
186
|
+
--------
|
|
187
|
+
>>> df = pl.DataFrame(
|
|
188
|
+
... {
|
|
189
|
+
... "month": [1, 2, 3],
|
|
190
|
+
... "day": [4, 5, 6],
|
|
191
|
+
... }
|
|
192
|
+
... )
|
|
193
|
+
>>> df.with_columns(pl.date(2024, pl.col("month"), pl.col("day")))
|
|
194
|
+
shape: (3, 3)
|
|
195
|
+
┌───────┬─────┬────────────┐
|
|
196
|
+
│ month ┆ day ┆ date │
|
|
197
|
+
│ --- ┆ --- ┆ --- │
|
|
198
|
+
│ i64 ┆ i64 ┆ date │
|
|
199
|
+
╞═══════╪═════╪════════════╡
|
|
200
|
+
│ 1 ┆ 4 ┆ 2024-01-04 │
|
|
201
|
+
│ 2 ┆ 5 ┆ 2024-02-05 │
|
|
202
|
+
│ 3 ┆ 6 ┆ 2024-03-06 │
|
|
203
|
+
└───────┴─────┴────────────┘
|
|
204
|
+
|
|
205
|
+
We can also use `pl.date` for filtering:
|
|
206
|
+
|
|
207
|
+
>>> from datetime import date
|
|
208
|
+
>>> df = pl.DataFrame(
|
|
209
|
+
... {
|
|
210
|
+
... "start": [date(2024, 1, 1), date(2024, 1, 1), date(2024, 1, 1)],
|
|
211
|
+
... "end": [date(2024, 5, 1), date(2024, 7, 1), date(2024, 9, 1)],
|
|
212
|
+
... }
|
|
213
|
+
... )
|
|
214
|
+
>>> df.filter(pl.col("end") > pl.date(2024, 6, 1))
|
|
215
|
+
shape: (2, 2)
|
|
216
|
+
┌────────────┬────────────┐
|
|
217
|
+
│ start ┆ end │
|
|
218
|
+
│ --- ┆ --- │
|
|
219
|
+
│ date ┆ date │
|
|
220
|
+
╞════════════╪════════════╡
|
|
221
|
+
│ 2024-01-01 ┆ 2024-07-01 │
|
|
222
|
+
│ 2024-01-01 ┆ 2024-09-01 │
|
|
223
|
+
└────────────┴────────────┘
|
|
224
|
+
"""
|
|
225
|
+
return datetime_(year, month, day).cast(Date).alias("date")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def time_(
|
|
229
|
+
hour: Expr | str | int | None = None,
|
|
230
|
+
minute: Expr | str | int | None = None,
|
|
231
|
+
second: Expr | str | int | None = None,
|
|
232
|
+
microsecond: Expr | str | int | None = None,
|
|
233
|
+
) -> Expr:
|
|
234
|
+
"""
|
|
235
|
+
Create a Polars literal expression of type Time.
|
|
236
|
+
|
|
237
|
+
Parameters
|
|
238
|
+
----------
|
|
239
|
+
hour
|
|
240
|
+
column or literal, ranging from 0-23.
|
|
241
|
+
minute
|
|
242
|
+
column or literal, ranging from 0-59.
|
|
243
|
+
second
|
|
244
|
+
column or literal, ranging from 0-59.
|
|
245
|
+
microsecond
|
|
246
|
+
column or literal, ranging from 0-999999.
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
Expr
|
|
251
|
+
Expression of data type :class:`Date`.
|
|
252
|
+
|
|
253
|
+
Examples
|
|
254
|
+
--------
|
|
255
|
+
>>> df = pl.DataFrame(
|
|
256
|
+
... {
|
|
257
|
+
... "hour": [12, 13, 14],
|
|
258
|
+
... "minute": [15, 30, 45],
|
|
259
|
+
... }
|
|
260
|
+
... )
|
|
261
|
+
|
|
262
|
+
>>> df.with_columns(pl.time(pl.col("hour"), pl.col("minute")))
|
|
263
|
+
shape: (3, 3)
|
|
264
|
+
┌──────┬────────┬──────────┐
|
|
265
|
+
│ hour ┆ minute ┆ time │
|
|
266
|
+
│ --- ┆ --- ┆ --- │
|
|
267
|
+
│ i64 ┆ i64 ┆ time │
|
|
268
|
+
╞══════╪════════╪══════════╡
|
|
269
|
+
│ 12 ┆ 15 ┆ 12:15:00 │
|
|
270
|
+
│ 13 ┆ 30 ┆ 13:30:00 │
|
|
271
|
+
│ 14 ┆ 45 ┆ 14:45:00 │
|
|
272
|
+
└──────┴────────┴──────────┘
|
|
273
|
+
"""
|
|
274
|
+
epoch_start = (1970, 1, 1)
|
|
275
|
+
return (
|
|
276
|
+
datetime_(*epoch_start, hour, minute, second, microsecond)
|
|
277
|
+
.cast(Time)
|
|
278
|
+
.alias("time")
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def duration(
|
|
283
|
+
*,
|
|
284
|
+
weeks: Expr | str | int | float | None = None,
|
|
285
|
+
days: Expr | str | int | float | None = None,
|
|
286
|
+
hours: Expr | str | int | float | None = None,
|
|
287
|
+
minutes: Expr | str | int | float | None = None,
|
|
288
|
+
seconds: Expr | str | int | float | None = None,
|
|
289
|
+
milliseconds: Expr | str | int | float | None = None,
|
|
290
|
+
microseconds: Expr | str | int | float | None = None,
|
|
291
|
+
nanoseconds: Expr | str | int | float | None = None,
|
|
292
|
+
time_unit: TimeUnit | None = None,
|
|
293
|
+
) -> Expr:
|
|
294
|
+
"""
|
|
295
|
+
Create polars `Duration` from distinct time components.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
weeks
|
|
300
|
+
Number of weeks.
|
|
301
|
+
days
|
|
302
|
+
Number of days.
|
|
303
|
+
hours
|
|
304
|
+
Number of hours.
|
|
305
|
+
minutes
|
|
306
|
+
Number of minutes.
|
|
307
|
+
seconds
|
|
308
|
+
Number of seconds.
|
|
309
|
+
milliseconds
|
|
310
|
+
Number of milliseconds.
|
|
311
|
+
microseconds
|
|
312
|
+
Number of microseconds.
|
|
313
|
+
nanoseconds
|
|
314
|
+
Number of nanoseconds.
|
|
315
|
+
time_unit : {None, 'us', 'ms', 'ns'}
|
|
316
|
+
Time unit of the resulting expression. If set to `None` (default), the time
|
|
317
|
+
unit will be inferred from the other inputs: `'ns'` if `nanoseconds` was
|
|
318
|
+
specified, `'us'` otherwise.
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
Expr
|
|
323
|
+
Expression of data type :class:`Duration`.
|
|
324
|
+
|
|
325
|
+
Notes
|
|
326
|
+
-----
|
|
327
|
+
A `duration` represents a fixed amount of time. For example,
|
|
328
|
+
`pl.duration(days=1)` means "exactly 24 hours". By contrast,
|
|
329
|
+
`Expr.dt.offset_by('1d')` means "1 calendar day", which could sometimes be
|
|
330
|
+
23 hours or 25 hours depending on Daylight Savings Time.
|
|
331
|
+
For non-fixed durations such as "calendar month" or "calendar day",
|
|
332
|
+
please use :meth:`polars.Expr.dt.offset_by` instead.
|
|
333
|
+
|
|
334
|
+
Examples
|
|
335
|
+
--------
|
|
336
|
+
>>> from datetime import datetime
|
|
337
|
+
>>> df = pl.DataFrame(
|
|
338
|
+
... {
|
|
339
|
+
... "dt": [datetime(2022, 1, 1), datetime(2022, 1, 2)],
|
|
340
|
+
... "add": [1, 2],
|
|
341
|
+
... }
|
|
342
|
+
... )
|
|
343
|
+
>>> df
|
|
344
|
+
shape: (2, 2)
|
|
345
|
+
┌─────────────────────┬─────┐
|
|
346
|
+
│ dt ┆ add │
|
|
347
|
+
│ --- ┆ --- │
|
|
348
|
+
│ datetime[μs] ┆ i64 │
|
|
349
|
+
╞═════════════════════╪═════╡
|
|
350
|
+
│ 2022-01-01 00:00:00 ┆ 1 │
|
|
351
|
+
│ 2022-01-02 00:00:00 ┆ 2 │
|
|
352
|
+
└─────────────────────┴─────┘
|
|
353
|
+
>>> with pl.Config(tbl_width_chars=120):
|
|
354
|
+
... df.select(
|
|
355
|
+
... (pl.col("dt") + pl.duration(weeks="add")).alias("add_weeks"),
|
|
356
|
+
... (pl.col("dt") + pl.duration(days="add")).alias("add_days"),
|
|
357
|
+
... (pl.col("dt") + pl.duration(seconds="add")).alias("add_seconds"),
|
|
358
|
+
... (pl.col("dt") + pl.duration(milliseconds="add")).alias("add_millis"),
|
|
359
|
+
... (pl.col("dt") + pl.duration(hours="add")).alias("add_hours"),
|
|
360
|
+
... )
|
|
361
|
+
shape: (2, 5)
|
|
362
|
+
┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
|
|
363
|
+
│ add_weeks ┆ add_days ┆ add_seconds ┆ add_millis ┆ add_hours │
|
|
364
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
365
|
+
│ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] │
|
|
366
|
+
╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
|
|
367
|
+
│ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
|
|
368
|
+
│ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
|
|
369
|
+
└─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘
|
|
370
|
+
|
|
371
|
+
If you need to add non-fixed durations, you should use :meth:`polars.Expr.dt.offset_by` instead:
|
|
372
|
+
|
|
373
|
+
>>> with pl.Config(tbl_width_chars=120):
|
|
374
|
+
... df.select(
|
|
375
|
+
... add_calendar_days=pl.col("dt").dt.offset_by(
|
|
376
|
+
... pl.format("{}d", pl.col("add"))
|
|
377
|
+
... ),
|
|
378
|
+
... add_calendar_months=pl.col("dt").dt.offset_by(
|
|
379
|
+
... pl.format("{}mo", pl.col("add"))
|
|
380
|
+
... ),
|
|
381
|
+
... add_calendar_years=pl.col("dt").dt.offset_by(
|
|
382
|
+
... pl.format("{}y", pl.col("add"))
|
|
383
|
+
... ),
|
|
384
|
+
... )
|
|
385
|
+
shape: (2, 3)
|
|
386
|
+
┌─────────────────────┬─────────────────────┬─────────────────────┐
|
|
387
|
+
│ add_calendar_days ┆ add_calendar_months ┆ add_calendar_years │
|
|
388
|
+
│ --- ┆ --- ┆ --- │
|
|
389
|
+
│ datetime[μs] ┆ datetime[μs] ┆ datetime[μs] │
|
|
390
|
+
╞═════════════════════╪═════════════════════╪═════════════════════╡
|
|
391
|
+
│ 2022-01-02 00:00:00 ┆ 2022-02-01 00:00:00 ┆ 2023-01-01 00:00:00 │
|
|
392
|
+
│ 2022-01-04 00:00:00 ┆ 2022-03-02 00:00:00 ┆ 2024-01-02 00:00:00 │
|
|
393
|
+
└─────────────────────┴─────────────────────┴─────────────────────┘
|
|
394
|
+
""" # noqa: W505
|
|
395
|
+
if nanoseconds is not None and time_unit is None:
|
|
396
|
+
time_unit = "ns"
|
|
397
|
+
|
|
398
|
+
weeks_expr = parse_into_expression(weeks) if weeks is not None else None
|
|
399
|
+
days_expr = parse_into_expression(days) if days is not None else None
|
|
400
|
+
hours_expr = parse_into_expression(hours) if hours is not None else None
|
|
401
|
+
minutes_expr = parse_into_expression(minutes) if minutes is not None else None
|
|
402
|
+
seconds_expr = parse_into_expression(seconds) if seconds is not None else None
|
|
403
|
+
milliseconds_expr = (
|
|
404
|
+
parse_into_expression(milliseconds) if milliseconds is not None else None
|
|
405
|
+
)
|
|
406
|
+
microseconds_expr = (
|
|
407
|
+
parse_into_expression(microseconds) if microseconds is not None else None
|
|
408
|
+
)
|
|
409
|
+
nanoseconds_expr = (
|
|
410
|
+
parse_into_expression(nanoseconds) if nanoseconds is not None else None
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
if time_unit is None:
|
|
414
|
+
time_unit = "us"
|
|
415
|
+
|
|
416
|
+
return wrap_expr(
|
|
417
|
+
plr.duration(
|
|
418
|
+
weeks_expr,
|
|
419
|
+
days_expr,
|
|
420
|
+
hours_expr,
|
|
421
|
+
minutes_expr,
|
|
422
|
+
seconds_expr,
|
|
423
|
+
milliseconds_expr,
|
|
424
|
+
microseconds_expr,
|
|
425
|
+
nanoseconds_expr,
|
|
426
|
+
time_unit,
|
|
427
|
+
)
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
|
|
432
|
+
"""
|
|
433
|
+
Horizontally concatenate columns into a single list column.
|
|
434
|
+
|
|
435
|
+
Operates in linear time.
|
|
436
|
+
|
|
437
|
+
Parameters
|
|
438
|
+
----------
|
|
439
|
+
exprs
|
|
440
|
+
Columns to concatenate into a single list column. Accepts expression input.
|
|
441
|
+
Strings are parsed as column names, other non-expression inputs are parsed as
|
|
442
|
+
literals.
|
|
443
|
+
*more_exprs
|
|
444
|
+
Additional columns to concatenate into a single list column, specified as
|
|
445
|
+
positional arguments.
|
|
446
|
+
|
|
447
|
+
Examples
|
|
448
|
+
--------
|
|
449
|
+
Concatenate two existing list columns. Null values are propagated.
|
|
450
|
+
|
|
451
|
+
>>> df = pl.DataFrame({"a": [[1, 2], [3], [4, 5]], "b": [[4], [], None]})
|
|
452
|
+
>>> df.with_columns(concat_list=pl.concat_list("a", "b"))
|
|
453
|
+
shape: (3, 3)
|
|
454
|
+
┌───────────┬───────────┬─────────────┐
|
|
455
|
+
│ a ┆ b ┆ concat_list │
|
|
456
|
+
│ --- ┆ --- ┆ --- │
|
|
457
|
+
│ list[i64] ┆ list[i64] ┆ list[i64] │
|
|
458
|
+
╞═══════════╪═══════════╪═════════════╡
|
|
459
|
+
│ [1, 2] ┆ [4] ┆ [1, 2, 4] │
|
|
460
|
+
│ [3] ┆ [] ┆ [3] │
|
|
461
|
+
│ [4, 5] ┆ null ┆ null │
|
|
462
|
+
└───────────┴───────────┴─────────────┘
|
|
463
|
+
|
|
464
|
+
Non-list columns are cast to a list before concatenation. The output data type
|
|
465
|
+
is the supertype of the concatenated columns.
|
|
466
|
+
|
|
467
|
+
>>> df.select("a", concat_list=pl.concat_list("a", pl.lit("x")))
|
|
468
|
+
shape: (3, 2)
|
|
469
|
+
┌───────────┬─────────────────┐
|
|
470
|
+
│ a ┆ concat_list │
|
|
471
|
+
│ --- ┆ --- │
|
|
472
|
+
│ list[i64] ┆ list[str] │
|
|
473
|
+
╞═══════════╪═════════════════╡
|
|
474
|
+
│ [1, 2] ┆ ["1", "2", "x"] │
|
|
475
|
+
│ [3] ┆ ["3", "x"] │
|
|
476
|
+
│ [4, 5] ┆ ["4", "5", "x"] │
|
|
477
|
+
└───────────┴─────────────────┘
|
|
478
|
+
|
|
479
|
+
Create lagged columns and collect them into a list. This mimics a rolling window.
|
|
480
|
+
|
|
481
|
+
>>> df = pl.DataFrame({"A": [1.0, 2.0, 9.0, 2.0, 13.0]})
|
|
482
|
+
>>> df = df.select([pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3)])
|
|
483
|
+
>>> df.select(
|
|
484
|
+
... pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling")
|
|
485
|
+
... )
|
|
486
|
+
shape: (5, 1)
|
|
487
|
+
┌───────────────────┐
|
|
488
|
+
│ A_rolling │
|
|
489
|
+
│ --- │
|
|
490
|
+
│ list[f64] │
|
|
491
|
+
╞═══════════════════╡
|
|
492
|
+
│ [null, null, 1.0] │
|
|
493
|
+
│ [null, 1.0, 2.0] │
|
|
494
|
+
│ [1.0, 2.0, 9.0] │
|
|
495
|
+
│ [2.0, 9.0, 2.0] │
|
|
496
|
+
│ [9.0, 2.0, 13.0] │
|
|
497
|
+
└───────────────────┘
|
|
498
|
+
"""
|
|
499
|
+
exprs = parse_into_list_of_expressions(exprs, *more_exprs)
|
|
500
|
+
return wrap_expr(plr.concat_list(exprs))
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def concat_arr(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
|
|
504
|
+
"""
|
|
505
|
+
Horizontally concatenate columns into a single array column.
|
|
506
|
+
|
|
507
|
+
Non-array columns are reshaped to a unit-width array. All columns must have
|
|
508
|
+
a dtype of either `pl.Array(<DataType>, width)` or `pl.<DataType>`.
|
|
509
|
+
|
|
510
|
+
.. warning::
|
|
511
|
+
This functionality is considered **unstable**. It may be changed
|
|
512
|
+
at any point without it being considered a breaking change.
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
exprs
|
|
517
|
+
Columns to concatenate into a single array column. Accepts expression input.
|
|
518
|
+
Strings are parsed as column names, other non-expression inputs are parsed as
|
|
519
|
+
literals.
|
|
520
|
+
*more_exprs
|
|
521
|
+
Additional columns to concatenate into a single array column, specified as
|
|
522
|
+
positional arguments.
|
|
523
|
+
|
|
524
|
+
Examples
|
|
525
|
+
--------
|
|
526
|
+
Concatenate 2 array columns:
|
|
527
|
+
|
|
528
|
+
>>> (
|
|
529
|
+
... pl.select(
|
|
530
|
+
... a=pl.Series([[1], [3], None], dtype=pl.Array(pl.Int64, 1)),
|
|
531
|
+
... b=pl.Series([[3], [None], [5]], dtype=pl.Array(pl.Int64, 1)),
|
|
532
|
+
... ).with_columns(
|
|
533
|
+
... pl.concat_arr("a", "b").alias("concat_arr(a, b)"),
|
|
534
|
+
... pl.concat_arr("a", pl.first("b")).alias("concat_arr(a, first(b))"),
|
|
535
|
+
... )
|
|
536
|
+
... )
|
|
537
|
+
shape: (3, 4)
|
|
538
|
+
┌───────────────┬───────────────┬──────────────────┬─────────────────────────┐
|
|
539
|
+
│ a ┆ b ┆ concat_arr(a, b) ┆ concat_arr(a, first(b)) │
|
|
540
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
541
|
+
│ array[i64, 1] ┆ array[i64, 1] ┆ array[i64, 2] ┆ array[i64, 2] │
|
|
542
|
+
╞═══════════════╪═══════════════╪══════════════════╪═════════════════════════╡
|
|
543
|
+
│ [1] ┆ [3] ┆ [1, 3] ┆ [1, 3] │
|
|
544
|
+
│ [3] ┆ [null] ┆ [3, null] ┆ [3, 3] │
|
|
545
|
+
│ null ┆ [5] ┆ null ┆ null │
|
|
546
|
+
└───────────────┴───────────────┴──────────────────┴─────────────────────────┘
|
|
547
|
+
|
|
548
|
+
Concatenate non-array columns:
|
|
549
|
+
|
|
550
|
+
>>> (
|
|
551
|
+
... pl.select(
|
|
552
|
+
... c=pl.Series([None, 5, 6], dtype=pl.Int64),
|
|
553
|
+
... )
|
|
554
|
+
... .with_columns(d=pl.col("c").reverse())
|
|
555
|
+
... .with_columns(
|
|
556
|
+
... pl.concat_arr("c", "d").alias("concat_arr(c, d)"),
|
|
557
|
+
... )
|
|
558
|
+
... )
|
|
559
|
+
shape: (3, 3)
|
|
560
|
+
┌──────┬──────┬──────────────────┐
|
|
561
|
+
│ c ┆ d ┆ concat_arr(c, d) │
|
|
562
|
+
│ --- ┆ --- ┆ --- │
|
|
563
|
+
│ i64 ┆ i64 ┆ array[i64, 2] │
|
|
564
|
+
╞══════╪══════╪══════════════════╡
|
|
565
|
+
│ null ┆ 6 ┆ [null, 6] │
|
|
566
|
+
│ 5 ┆ 5 ┆ [5, 5] │
|
|
567
|
+
│ 6 ┆ null ┆ [6, null] │
|
|
568
|
+
└──────┴──────┴──────────────────┘
|
|
569
|
+
|
|
570
|
+
Concatenate mixed array and non-array columns:
|
|
571
|
+
|
|
572
|
+
>>> (
|
|
573
|
+
... pl.select(
|
|
574
|
+
... a=pl.Series([[1], [3], None], dtype=pl.Array(pl.Int64, 1)),
|
|
575
|
+
... b=pl.Series([[3], [None], [5]], dtype=pl.Array(pl.Int64, 1)),
|
|
576
|
+
... c=pl.Series([None, 5, 6], dtype=pl.Int64),
|
|
577
|
+
... ).with_columns(
|
|
578
|
+
... pl.concat_arr("a", "b", "c").alias("concat_arr(a, b, c)"),
|
|
579
|
+
... )
|
|
580
|
+
... )
|
|
581
|
+
shape: (3, 4)
|
|
582
|
+
┌───────────────┬───────────────┬──────┬─────────────────────┐
|
|
583
|
+
│ a ┆ b ┆ c ┆ concat_arr(a, b, c) │
|
|
584
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
585
|
+
│ array[i64, 1] ┆ array[i64, 1] ┆ i64 ┆ array[i64, 3] │
|
|
586
|
+
╞═══════════════╪═══════════════╪══════╪═════════════════════╡
|
|
587
|
+
│ [1] ┆ [3] ┆ null ┆ [1, 3, null] │
|
|
588
|
+
│ [3] ┆ [null] ┆ 5 ┆ [3, null, 5] │
|
|
589
|
+
│ null ┆ [5] ┆ 6 ┆ null │
|
|
590
|
+
└───────────────┴───────────────┴──────┴─────────────────────┘
|
|
591
|
+
|
|
592
|
+
Unit-length columns are broadcasted:
|
|
593
|
+
|
|
594
|
+
>>> (
|
|
595
|
+
... pl.select(
|
|
596
|
+
... a=pl.Series([1, 3, None]),
|
|
597
|
+
... ).with_columns(
|
|
598
|
+
... pl.concat_arr("a", pl.lit(0, dtype=pl.Int64)).alias("concat_arr(a, 0)"),
|
|
599
|
+
... pl.concat_arr("a", pl.sum("a")).alias("concat_arr(a, sum(a))"),
|
|
600
|
+
... pl.concat_arr("a", pl.max("a")).alias("concat_arr(a, max(a))"),
|
|
601
|
+
... )
|
|
602
|
+
... )
|
|
603
|
+
shape: (3, 4)
|
|
604
|
+
┌──────┬──────────────────┬───────────────────────┬───────────────────────┐
|
|
605
|
+
│ a ┆ concat_arr(a, 0) ┆ concat_arr(a, sum(a)) ┆ concat_arr(a, max(a)) │
|
|
606
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
607
|
+
│ i64 ┆ array[i64, 2] ┆ array[i64, 2] ┆ array[i64, 2] │
|
|
608
|
+
╞══════╪══════════════════╪═══════════════════════╪═══════════════════════╡
|
|
609
|
+
│ 1 ┆ [1, 0] ┆ [1, 4] ┆ [1, 3] │
|
|
610
|
+
│ 3 ┆ [3, 0] ┆ [3, 4] ┆ [3, 3] │
|
|
611
|
+
│ null ┆ [null, 0] ┆ [null, 4] ┆ [null, 3] │
|
|
612
|
+
└──────┴──────────────────┴───────────────────────┴───────────────────────┘
|
|
613
|
+
"""
|
|
614
|
+
msg = "`concat_arr` functionality is considered unstable"
|
|
615
|
+
issue_unstable_warning(msg)
|
|
616
|
+
|
|
617
|
+
exprs = parse_into_list_of_expressions(exprs, *more_exprs)
|
|
618
|
+
return wrap_expr(plr.concat_arr(exprs))
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
@overload
|
|
622
|
+
def struct(
|
|
623
|
+
*exprs: IntoExpr | Iterable[IntoExpr],
|
|
624
|
+
schema: SchemaDict | None = ...,
|
|
625
|
+
eager: Literal[False] = ...,
|
|
626
|
+
**named_exprs: IntoExpr,
|
|
627
|
+
) -> Expr: ...
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
@overload
|
|
631
|
+
def struct(
|
|
632
|
+
*exprs: IntoExpr | Iterable[IntoExpr],
|
|
633
|
+
schema: SchemaDict | None = ...,
|
|
634
|
+
eager: Literal[True],
|
|
635
|
+
**named_exprs: IntoExpr,
|
|
636
|
+
) -> Series: ...
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
@overload
|
|
640
|
+
def struct(
|
|
641
|
+
*exprs: IntoExpr | Iterable[IntoExpr],
|
|
642
|
+
schema: SchemaDict | None = ...,
|
|
643
|
+
eager: bool,
|
|
644
|
+
**named_exprs: IntoExpr,
|
|
645
|
+
) -> Expr | Series: ...
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def struct(
|
|
649
|
+
*exprs: IntoExpr | Iterable[IntoExpr],
|
|
650
|
+
schema: SchemaDict | None = None,
|
|
651
|
+
eager: bool = False,
|
|
652
|
+
**named_exprs: IntoExpr,
|
|
653
|
+
) -> Expr | Series:
|
|
654
|
+
"""
|
|
655
|
+
Collect columns into a struct column.
|
|
656
|
+
|
|
657
|
+
Parameters
|
|
658
|
+
----------
|
|
659
|
+
*exprs
|
|
660
|
+
Column(s) to collect into a struct column, specified as positional arguments.
|
|
661
|
+
Accepts expression input. Strings are parsed as column names,
|
|
662
|
+
other non-expression inputs are parsed as literals.
|
|
663
|
+
schema
|
|
664
|
+
Optional schema that explicitly defines the struct field dtypes. If no columns
|
|
665
|
+
or expressions are provided, schema keys are used to define columns.
|
|
666
|
+
eager
|
|
667
|
+
Evaluate immediately and return a `Series`. If set to `False` (default),
|
|
668
|
+
return an expression instead.
|
|
669
|
+
**named_exprs
|
|
670
|
+
Additional columns to collect into the struct column, specified as keyword
|
|
671
|
+
arguments. The columns will be renamed to the keyword used.
|
|
672
|
+
|
|
673
|
+
Examples
|
|
674
|
+
--------
|
|
675
|
+
Collect all columns of a dataframe into a struct by passing `pl.all()`.
|
|
676
|
+
|
|
677
|
+
>>> df = pl.DataFrame(
|
|
678
|
+
... {
|
|
679
|
+
... "int": [1, 2],
|
|
680
|
+
... "str": ["a", "b"],
|
|
681
|
+
... "bool": [True, None],
|
|
682
|
+
... "list": [[1, 2], [3]],
|
|
683
|
+
... }
|
|
684
|
+
... )
|
|
685
|
+
>>> df.select(pl.struct(pl.all()).alias("my_struct"))
|
|
686
|
+
shape: (2, 1)
|
|
687
|
+
┌─────────────────────┐
|
|
688
|
+
│ my_struct │
|
|
689
|
+
│ --- │
|
|
690
|
+
│ struct[4] │
|
|
691
|
+
╞═════════════════════╡
|
|
692
|
+
│ {1,"a",true,[1, 2]} │
|
|
693
|
+
│ {2,"b",null,[3]} │
|
|
694
|
+
└─────────────────────┘
|
|
695
|
+
|
|
696
|
+
Collect selected columns into a struct by either passing a list of columns, or by
|
|
697
|
+
specifying each column as a positional argument.
|
|
698
|
+
|
|
699
|
+
>>> df.select(pl.struct("int", False).alias("my_struct"))
|
|
700
|
+
shape: (2, 1)
|
|
701
|
+
┌───────────┐
|
|
702
|
+
│ my_struct │
|
|
703
|
+
│ --- │
|
|
704
|
+
│ struct[2] │
|
|
705
|
+
╞═══════════╡
|
|
706
|
+
│ {1,false} │
|
|
707
|
+
│ {2,false} │
|
|
708
|
+
└───────────┘
|
|
709
|
+
|
|
710
|
+
Use keyword arguments to easily name each struct field.
|
|
711
|
+
|
|
712
|
+
>>> df.select(pl.struct(p="int", q="bool").alias("my_struct")).schema
|
|
713
|
+
Schema({'my_struct': Struct({'p': Int64, 'q': Boolean})})
|
|
714
|
+
"""
|
|
715
|
+
pyexprs = parse_into_list_of_expressions(*exprs, **named_exprs)
|
|
716
|
+
|
|
717
|
+
if schema:
|
|
718
|
+
if not exprs and not named_exprs:
|
|
719
|
+
# no columns or expressions provided; create one from schema keys
|
|
720
|
+
expr = wrap_expr(
|
|
721
|
+
plr.as_struct(parse_into_list_of_expressions(list(schema.keys())))
|
|
722
|
+
)
|
|
723
|
+
else:
|
|
724
|
+
expr = wrap_expr(plr.as_struct(pyexprs))
|
|
725
|
+
expr = expr.cast(Struct(schema), strict=False)
|
|
726
|
+
else:
|
|
727
|
+
expr = wrap_expr(plr.as_struct(pyexprs))
|
|
728
|
+
|
|
729
|
+
if eager:
|
|
730
|
+
return F.select(expr).to_series()
|
|
731
|
+
else:
|
|
732
|
+
return expr
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def concat_str(
|
|
736
|
+
exprs: IntoExpr | Iterable[IntoExpr],
|
|
737
|
+
*more_exprs: IntoExpr,
|
|
738
|
+
separator: str = "",
|
|
739
|
+
ignore_nulls: bool = False,
|
|
740
|
+
) -> Expr:
|
|
741
|
+
"""
|
|
742
|
+
Horizontally concatenate columns into a single string column.
|
|
743
|
+
|
|
744
|
+
Operates in linear time.
|
|
745
|
+
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
exprs
|
|
749
|
+
Columns to concatenate into a single string column. Accepts expression input.
|
|
750
|
+
Strings are parsed as column names, other non-expression inputs are parsed as
|
|
751
|
+
literals. Non-`String` columns are cast to `String`.
|
|
752
|
+
*more_exprs
|
|
753
|
+
Additional columns to concatenate into a single string column, specified as
|
|
754
|
+
positional arguments.
|
|
755
|
+
separator
|
|
756
|
+
String that will be used to separate the values of each column.
|
|
757
|
+
ignore_nulls
|
|
758
|
+
Ignore null values (default is ``False``).
|
|
759
|
+
|
|
760
|
+
If set to ``False``, null values will be propagated.
|
|
761
|
+
if the row contains any null values, the output is null.
|
|
762
|
+
|
|
763
|
+
Examples
|
|
764
|
+
--------
|
|
765
|
+
>>> df = pl.DataFrame(
|
|
766
|
+
... {
|
|
767
|
+
... "a": [1, 2, 3],
|
|
768
|
+
... "b": ["dogs", "cats", None],
|
|
769
|
+
... "c": ["play", "swim", "walk"],
|
|
770
|
+
... }
|
|
771
|
+
... )
|
|
772
|
+
>>> df.with_columns(
|
|
773
|
+
... pl.concat_str(
|
|
774
|
+
... [
|
|
775
|
+
... pl.col("a") * 2,
|
|
776
|
+
... pl.col("b"),
|
|
777
|
+
... pl.col("c"),
|
|
778
|
+
... ],
|
|
779
|
+
... separator=" ",
|
|
780
|
+
... ).alias("full_sentence"),
|
|
781
|
+
... )
|
|
782
|
+
shape: (3, 4)
|
|
783
|
+
┌─────┬──────┬──────┬───────────────┐
|
|
784
|
+
│ a ┆ b ┆ c ┆ full_sentence │
|
|
785
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
786
|
+
│ i64 ┆ str ┆ str ┆ str │
|
|
787
|
+
╞═════╪══════╪══════╪═══════════════╡
|
|
788
|
+
│ 1 ┆ dogs ┆ play ┆ 2 dogs play │
|
|
789
|
+
│ 2 ┆ cats ┆ swim ┆ 4 cats swim │
|
|
790
|
+
│ 3 ┆ null ┆ walk ┆ null │
|
|
791
|
+
└─────┴──────┴──────┴───────────────┘
|
|
792
|
+
"""
|
|
793
|
+
exprs = parse_into_list_of_expressions(exprs, *more_exprs)
|
|
794
|
+
return wrap_expr(plr.concat_str(exprs, separator, ignore_nulls))
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def format(f_string: str, *args: Expr | str) -> Expr:
|
|
798
|
+
"""
|
|
799
|
+
Format expressions as a string.
|
|
800
|
+
|
|
801
|
+
Parameters
|
|
802
|
+
----------
|
|
803
|
+
f_string
|
|
804
|
+
A string that with placeholders.
|
|
805
|
+
For example: "hello_{}" or "{}_world
|
|
806
|
+
args
|
|
807
|
+
Expression(s) that fill the placeholders
|
|
808
|
+
|
|
809
|
+
Examples
|
|
810
|
+
--------
|
|
811
|
+
>>> df = pl.DataFrame(
|
|
812
|
+
... {
|
|
813
|
+
... "a": ["a", "b", "c"],
|
|
814
|
+
... "b": [1, 2, 3],
|
|
815
|
+
... }
|
|
816
|
+
... )
|
|
817
|
+
>>> df.select(
|
|
818
|
+
... [
|
|
819
|
+
... pl.format("foo_{}_bar_{}", pl.col("a"), "b").alias("fmt"),
|
|
820
|
+
... ]
|
|
821
|
+
... )
|
|
822
|
+
shape: (3, 1)
|
|
823
|
+
┌─────────────┐
|
|
824
|
+
│ fmt │
|
|
825
|
+
│ --- │
|
|
826
|
+
│ str │
|
|
827
|
+
╞═════════════╡
|
|
828
|
+
│ foo_a_bar_1 │
|
|
829
|
+
│ foo_b_bar_2 │
|
|
830
|
+
│ foo_c_bar_3 │
|
|
831
|
+
└─────────────┘
|
|
832
|
+
"""
|
|
833
|
+
if f_string.count("{}") != len(args):
|
|
834
|
+
msg = "number of placeholders should equal the number of arguments"
|
|
835
|
+
raise ValueError(msg)
|
|
836
|
+
|
|
837
|
+
exprs = []
|
|
838
|
+
|
|
839
|
+
arguments = iter(args)
|
|
840
|
+
for i, s in enumerate(f_string.split("{}")):
|
|
841
|
+
if i > 0:
|
|
842
|
+
e = wrap_expr(parse_into_expression(next(arguments)))
|
|
843
|
+
exprs.append(e)
|
|
844
|
+
|
|
845
|
+
if len(s) > 0:
|
|
846
|
+
exprs.append(F.lit(s))
|
|
847
|
+
|
|
848
|
+
return concat_str(exprs, separator="")
|