polars-runtime-compat 1.34.0b2__cp39-abi3-win_arm64.whl → 1.34.0b4__cp39-abi3-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
- polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -96
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/expr/binary.py
DELETED
|
@@ -1,346 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
from polars._utils.parse import parse_into_expression
|
|
6
|
-
from polars._utils.various import scale_bytes
|
|
7
|
-
from polars._utils.wrap import wrap_expr
|
|
8
|
-
from polars.datatypes import parse_into_datatype_expr
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
from polars import DataTypeExpr, Expr
|
|
12
|
-
from polars._typing import (
|
|
13
|
-
Endianness,
|
|
14
|
-
IntoExpr,
|
|
15
|
-
PolarsDataType,
|
|
16
|
-
SizeUnit,
|
|
17
|
-
TransferEncoding,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ExprBinaryNameSpace:
|
|
22
|
-
"""Namespace for bin related expressions."""
|
|
23
|
-
|
|
24
|
-
_accessor = "bin"
|
|
25
|
-
|
|
26
|
-
def __init__(self, expr: Expr) -> None:
|
|
27
|
-
self._pyexpr = expr._pyexpr
|
|
28
|
-
|
|
29
|
-
def contains(self, literal: IntoExpr) -> Expr:
|
|
30
|
-
r"""
|
|
31
|
-
Check if binaries in Series contain a binary substring.
|
|
32
|
-
|
|
33
|
-
Parameters
|
|
34
|
-
----------
|
|
35
|
-
literal
|
|
36
|
-
The binary substring to look for
|
|
37
|
-
|
|
38
|
-
Returns
|
|
39
|
-
-------
|
|
40
|
-
Expr
|
|
41
|
-
Expression of data type :class:`Boolean`.
|
|
42
|
-
|
|
43
|
-
See Also
|
|
44
|
-
--------
|
|
45
|
-
starts_with : Check if the binary substring exists at the start
|
|
46
|
-
ends_with : Check if the binary substring exists at the end
|
|
47
|
-
|
|
48
|
-
Examples
|
|
49
|
-
--------
|
|
50
|
-
>>> colors = pl.DataFrame(
|
|
51
|
-
... {
|
|
52
|
-
... "name": ["black", "yellow", "blue"],
|
|
53
|
-
... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
|
|
54
|
-
... "lit": [b"\x00", b"\xff\x00", b"\xff\xff"],
|
|
55
|
-
... }
|
|
56
|
-
... )
|
|
57
|
-
>>> colors.select(
|
|
58
|
-
... "name",
|
|
59
|
-
... pl.col("code").bin.contains(b"\xff").alias("contains_with_lit"),
|
|
60
|
-
... pl.col("code").bin.contains(pl.col("lit")).alias("contains_with_expr"),
|
|
61
|
-
... )
|
|
62
|
-
shape: (3, 3)
|
|
63
|
-
┌────────┬───────────────────┬────────────────────┐
|
|
64
|
-
│ name ┆ contains_with_lit ┆ contains_with_expr │
|
|
65
|
-
│ --- ┆ --- ┆ --- │
|
|
66
|
-
│ str ┆ bool ┆ bool │
|
|
67
|
-
╞════════╪═══════════════════╪════════════════════╡
|
|
68
|
-
│ black ┆ false ┆ true │
|
|
69
|
-
│ yellow ┆ true ┆ true │
|
|
70
|
-
│ blue ┆ true ┆ false │
|
|
71
|
-
└────────┴───────────────────┴────────────────────┘
|
|
72
|
-
"""
|
|
73
|
-
literal_pyexpr = parse_into_expression(literal, str_as_lit=True)
|
|
74
|
-
return wrap_expr(self._pyexpr.bin_contains(literal_pyexpr))
|
|
75
|
-
|
|
76
|
-
def ends_with(self, suffix: IntoExpr) -> Expr:
|
|
77
|
-
r"""
|
|
78
|
-
Check if string values end with a binary substring.
|
|
79
|
-
|
|
80
|
-
Parameters
|
|
81
|
-
----------
|
|
82
|
-
suffix
|
|
83
|
-
Suffix substring.
|
|
84
|
-
|
|
85
|
-
Returns
|
|
86
|
-
-------
|
|
87
|
-
Expr
|
|
88
|
-
Expression of data type :class:`Boolean`.
|
|
89
|
-
|
|
90
|
-
See Also
|
|
91
|
-
--------
|
|
92
|
-
starts_with : Check if the binary substring exists at the start
|
|
93
|
-
contains : Check if the binary substring exists anywhere
|
|
94
|
-
|
|
95
|
-
Examples
|
|
96
|
-
--------
|
|
97
|
-
>>> colors = pl.DataFrame(
|
|
98
|
-
... {
|
|
99
|
-
... "name": ["black", "yellow", "blue"],
|
|
100
|
-
... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
|
|
101
|
-
... "suffix": [b"\x00", b"\xff\x00", b"\x00\x00"],
|
|
102
|
-
... }
|
|
103
|
-
... )
|
|
104
|
-
>>> colors.select(
|
|
105
|
-
... "name",
|
|
106
|
-
... pl.col("code").bin.ends_with(b"\xff").alias("ends_with_lit"),
|
|
107
|
-
... pl.col("code").bin.ends_with(pl.col("suffix")).alias("ends_with_expr"),
|
|
108
|
-
... )
|
|
109
|
-
shape: (3, 3)
|
|
110
|
-
┌────────┬───────────────┬────────────────┐
|
|
111
|
-
│ name ┆ ends_with_lit ┆ ends_with_expr │
|
|
112
|
-
│ --- ┆ --- ┆ --- │
|
|
113
|
-
│ str ┆ bool ┆ bool │
|
|
114
|
-
╞════════╪═══════════════╪════════════════╡
|
|
115
|
-
│ black ┆ false ┆ true │
|
|
116
|
-
│ yellow ┆ false ┆ true │
|
|
117
|
-
│ blue ┆ true ┆ false │
|
|
118
|
-
└────────┴───────────────┴────────────────┘
|
|
119
|
-
"""
|
|
120
|
-
suffix_pyexpr = parse_into_expression(suffix, str_as_lit=True)
|
|
121
|
-
return wrap_expr(self._pyexpr.bin_ends_with(suffix_pyexpr))
|
|
122
|
-
|
|
123
|
-
def starts_with(self, prefix: IntoExpr) -> Expr:
|
|
124
|
-
r"""
|
|
125
|
-
Check if values start with a binary substring.
|
|
126
|
-
|
|
127
|
-
Parameters
|
|
128
|
-
----------
|
|
129
|
-
prefix
|
|
130
|
-
Prefix substring.
|
|
131
|
-
|
|
132
|
-
Returns
|
|
133
|
-
-------
|
|
134
|
-
Expr
|
|
135
|
-
Expression of data type :class:`Boolean`.
|
|
136
|
-
|
|
137
|
-
See Also
|
|
138
|
-
--------
|
|
139
|
-
ends_with : Check if the binary substring exists at the end
|
|
140
|
-
contains : Check if the binary substring exists anywhere
|
|
141
|
-
|
|
142
|
-
Examples
|
|
143
|
-
--------
|
|
144
|
-
>>> colors = pl.DataFrame(
|
|
145
|
-
... {
|
|
146
|
-
... "name": ["black", "yellow", "blue"],
|
|
147
|
-
... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
|
|
148
|
-
... "prefix": [b"\x00", b"\xff\x00", b"\x00\x00"],
|
|
149
|
-
... }
|
|
150
|
-
... )
|
|
151
|
-
>>> colors.select(
|
|
152
|
-
... "name",
|
|
153
|
-
... pl.col("code").bin.starts_with(b"\xff").alias("starts_with_lit"),
|
|
154
|
-
... pl.col("code")
|
|
155
|
-
... .bin.starts_with(pl.col("prefix"))
|
|
156
|
-
... .alias("starts_with_expr"),
|
|
157
|
-
... )
|
|
158
|
-
shape: (3, 3)
|
|
159
|
-
┌────────┬─────────────────┬──────────────────┐
|
|
160
|
-
│ name ┆ starts_with_lit ┆ starts_with_expr │
|
|
161
|
-
│ --- ┆ --- ┆ --- │
|
|
162
|
-
│ str ┆ bool ┆ bool │
|
|
163
|
-
╞════════╪═════════════════╪══════════════════╡
|
|
164
|
-
│ black ┆ false ┆ true │
|
|
165
|
-
│ yellow ┆ true ┆ false │
|
|
166
|
-
│ blue ┆ false ┆ true │
|
|
167
|
-
└────────┴─────────────────┴──────────────────┘
|
|
168
|
-
"""
|
|
169
|
-
prefix_pyexpr = parse_into_expression(prefix, str_as_lit=True)
|
|
170
|
-
return wrap_expr(self._pyexpr.bin_starts_with(prefix_pyexpr))
|
|
171
|
-
|
|
172
|
-
def decode(self, encoding: TransferEncoding, *, strict: bool = True) -> Expr:
|
|
173
|
-
r"""
|
|
174
|
-
Decode values using the provided encoding.
|
|
175
|
-
|
|
176
|
-
Parameters
|
|
177
|
-
----------
|
|
178
|
-
encoding : {'hex', 'base64'}
|
|
179
|
-
The encoding to use.
|
|
180
|
-
strict
|
|
181
|
-
Raise an error if the underlying value cannot be decoded,
|
|
182
|
-
otherwise mask out with a null value.
|
|
183
|
-
|
|
184
|
-
Returns
|
|
185
|
-
-------
|
|
186
|
-
Expr
|
|
187
|
-
Expression of data type :class:`Binary`.
|
|
188
|
-
|
|
189
|
-
Examples
|
|
190
|
-
--------
|
|
191
|
-
>>> colors = pl.DataFrame(
|
|
192
|
-
... {
|
|
193
|
-
... "name": ["black", "yellow", "blue"],
|
|
194
|
-
... "encoded": [b"000000", b"ffff00", b"0000ff"],
|
|
195
|
-
... }
|
|
196
|
-
... )
|
|
197
|
-
>>> colors.with_columns(
|
|
198
|
-
... pl.col("encoded").bin.decode("hex").alias("code"),
|
|
199
|
-
... )
|
|
200
|
-
shape: (3, 3)
|
|
201
|
-
┌────────┬───────────┬─────────────────┐
|
|
202
|
-
│ name ┆ encoded ┆ code │
|
|
203
|
-
│ --- ┆ --- ┆ --- │
|
|
204
|
-
│ str ┆ binary ┆ binary │
|
|
205
|
-
╞════════╪═══════════╪═════════════════╡
|
|
206
|
-
│ black ┆ b"000000" ┆ b"\x00\x00\x00" │
|
|
207
|
-
│ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
|
|
208
|
-
│ blue ┆ b"0000ff" ┆ b"\x00\x00\xff" │
|
|
209
|
-
└────────┴───────────┴─────────────────┘
|
|
210
|
-
"""
|
|
211
|
-
if encoding == "hex":
|
|
212
|
-
return wrap_expr(self._pyexpr.bin_hex_decode(strict))
|
|
213
|
-
elif encoding == "base64":
|
|
214
|
-
return wrap_expr(self._pyexpr.bin_base64_decode(strict))
|
|
215
|
-
else:
|
|
216
|
-
msg = f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
|
|
217
|
-
raise ValueError(msg)
|
|
218
|
-
|
|
219
|
-
def encode(self, encoding: TransferEncoding) -> Expr:
|
|
220
|
-
r"""
|
|
221
|
-
Encode a value using the provided encoding.
|
|
222
|
-
|
|
223
|
-
Parameters
|
|
224
|
-
----------
|
|
225
|
-
encoding : {'hex', 'base64'}
|
|
226
|
-
The encoding to use.
|
|
227
|
-
|
|
228
|
-
Returns
|
|
229
|
-
-------
|
|
230
|
-
Expr
|
|
231
|
-
Expression of data type :class:`Binary`.
|
|
232
|
-
|
|
233
|
-
Examples
|
|
234
|
-
--------
|
|
235
|
-
>>> colors = pl.DataFrame(
|
|
236
|
-
... {
|
|
237
|
-
... "color": ["black", "yellow", "blue"],
|
|
238
|
-
... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
|
|
239
|
-
... }
|
|
240
|
-
... )
|
|
241
|
-
>>> colors.with_columns(
|
|
242
|
-
... pl.col("code").bin.encode("hex").alias("encoded"),
|
|
243
|
-
... )
|
|
244
|
-
shape: (3, 3)
|
|
245
|
-
┌────────┬─────────────────┬─────────┐
|
|
246
|
-
│ color ┆ code ┆ encoded │
|
|
247
|
-
│ --- ┆ --- ┆ --- │
|
|
248
|
-
│ str ┆ binary ┆ str │
|
|
249
|
-
╞════════╪═════════════════╪═════════╡
|
|
250
|
-
│ black ┆ b"\x00\x00\x00" ┆ 000000 │
|
|
251
|
-
│ yellow ┆ b"\xff\xff\x00" ┆ ffff00 │
|
|
252
|
-
│ blue ┆ b"\x00\x00\xff" ┆ 0000ff │
|
|
253
|
-
└────────┴─────────────────┴─────────┘
|
|
254
|
-
"""
|
|
255
|
-
if encoding == "hex":
|
|
256
|
-
return wrap_expr(self._pyexpr.bin_hex_encode())
|
|
257
|
-
elif encoding == "base64":
|
|
258
|
-
return wrap_expr(self._pyexpr.bin_base64_encode())
|
|
259
|
-
else:
|
|
260
|
-
msg = f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
|
|
261
|
-
raise ValueError(msg)
|
|
262
|
-
|
|
263
|
-
def size(self, unit: SizeUnit = "b") -> Expr:
|
|
264
|
-
r"""
|
|
265
|
-
Get the size of binary values in the given unit.
|
|
266
|
-
|
|
267
|
-
Parameters
|
|
268
|
-
----------
|
|
269
|
-
unit : {'b', 'kb', 'mb', 'gb', 'tb'}
|
|
270
|
-
Scale the returned size to the given unit.
|
|
271
|
-
|
|
272
|
-
Returns
|
|
273
|
-
-------
|
|
274
|
-
Expr
|
|
275
|
-
Expression of data type :class:`UInt32` or `Float64`.
|
|
276
|
-
|
|
277
|
-
Examples
|
|
278
|
-
--------
|
|
279
|
-
>>> from os import urandom
|
|
280
|
-
>>> df = pl.DataFrame({"data": [urandom(n) for n in (512, 256, 1024)]})
|
|
281
|
-
>>> df.with_columns( # doctest: +IGNORE_RESULT
|
|
282
|
-
... n_bytes=pl.col("data").bin.size(),
|
|
283
|
-
... n_kilobytes=pl.col("data").bin.size("kb"),
|
|
284
|
-
... )
|
|
285
|
-
shape: (4, 3)
|
|
286
|
-
┌─────────────────────────────────┬─────────┬─────────────┐
|
|
287
|
-
│ data ┆ n_bytes ┆ n_kilobytes │
|
|
288
|
-
│ --- ┆ --- ┆ --- │
|
|
289
|
-
│ binary ┆ u32 ┆ f64 │
|
|
290
|
-
╞═════════════════════════════════╪═════════╪═════════════╡
|
|
291
|
-
│ b"y?~B\x83\xf4V\x07\xd3\xfb\xb… ┆ 512 ┆ 0.5 │
|
|
292
|
-
│ b"\xee$4@f\xc14\x07\x8e\x88\x1… ┆ 256 ┆ 0.25 │
|
|
293
|
-
│ b"\x80\xbd\xb9nEq;2\x99$\xf9\x… ┆ 1024 ┆ 1.0 │
|
|
294
|
-
└─────────────────────────────────┴─────────┴─────────────┘
|
|
295
|
-
"""
|
|
296
|
-
sz = wrap_expr(self._pyexpr.bin_size_bytes())
|
|
297
|
-
sz = scale_bytes(sz, unit)
|
|
298
|
-
return sz
|
|
299
|
-
|
|
300
|
-
def reinterpret(
|
|
301
|
-
self, *, dtype: PolarsDataType | DataTypeExpr, endianness: Endianness = "little"
|
|
302
|
-
) -> Expr:
|
|
303
|
-
r"""
|
|
304
|
-
Interpret bytes as another type.
|
|
305
|
-
|
|
306
|
-
Supported types are numerical or temporal dtypes, or an ``Array`` of
|
|
307
|
-
these dtypes.
|
|
308
|
-
|
|
309
|
-
Parameters
|
|
310
|
-
----------
|
|
311
|
-
dtype : PolarsDataType
|
|
312
|
-
Which type to interpret binary column into.
|
|
313
|
-
endianness : {"big", "little"}, optional
|
|
314
|
-
Which endianness to use when interpreting bytes, by default "little".
|
|
315
|
-
|
|
316
|
-
Returns
|
|
317
|
-
-------
|
|
318
|
-
Expr
|
|
319
|
-
Expression of data type `dtype`.
|
|
320
|
-
Note that rows of the binary array where the length does not match
|
|
321
|
-
the size in bytes of the output array (number of items * byte size
|
|
322
|
-
of item) will become NULL.
|
|
323
|
-
|
|
324
|
-
Examples
|
|
325
|
-
--------
|
|
326
|
-
>>> df = pl.DataFrame({"data": [b"\x05\x00\x00\x00", b"\x10\x00\x01\x00"]})
|
|
327
|
-
>>> df.with_columns( # doctest: +IGNORE_RESULT
|
|
328
|
-
... bin2int=pl.col("data").bin.reinterpret(
|
|
329
|
-
... dtype=pl.Int32, endianness="little"
|
|
330
|
-
... ),
|
|
331
|
-
... )
|
|
332
|
-
shape: (2, 2)
|
|
333
|
-
┌─────────────────────┬─────────┐
|
|
334
|
-
│ data ┆ bin2int │
|
|
335
|
-
│ --- ┆ --- │
|
|
336
|
-
│ binary ┆ i32 │
|
|
337
|
-
╞═════════════════════╪═════════╡
|
|
338
|
-
│ b"\x05\x00\x00\x00" ┆ 5 │
|
|
339
|
-
│ b"\x10\x00\x01\x00" ┆ 65552 │
|
|
340
|
-
└─────────────────────┴─────────┘
|
|
341
|
-
"""
|
|
342
|
-
dtype = parse_into_datatype_expr(dtype)
|
|
343
|
-
|
|
344
|
-
return wrap_expr(
|
|
345
|
-
self._pyexpr.bin_reinterpret(dtype._pydatatype_expr, endianness)
|
|
346
|
-
)
|
polars/expr/categorical.py
DELETED
|
@@ -1,306 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
from polars._utils.various import qualified_type_name
|
|
6
|
-
from polars._utils.wrap import wrap_expr
|
|
7
|
-
|
|
8
|
-
if TYPE_CHECKING:
|
|
9
|
-
from polars import Expr
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class ExprCatNameSpace:
|
|
13
|
-
"""Namespace for categorical related expressions."""
|
|
14
|
-
|
|
15
|
-
_accessor = "cat"
|
|
16
|
-
|
|
17
|
-
def __init__(self, expr: Expr) -> None:
|
|
18
|
-
self._pyexpr = expr._pyexpr
|
|
19
|
-
|
|
20
|
-
def get_categories(self) -> Expr:
|
|
21
|
-
"""
|
|
22
|
-
Get the categories stored in this data type.
|
|
23
|
-
|
|
24
|
-
Examples
|
|
25
|
-
--------
|
|
26
|
-
>>> df = pl.Series(
|
|
27
|
-
... "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
|
|
28
|
-
... ).to_frame()
|
|
29
|
-
>>> df.select(pl.col("cats").cat.get_categories()) # doctest: +SKIP
|
|
30
|
-
shape: (3, 1)
|
|
31
|
-
┌──────┐
|
|
32
|
-
│ cats │
|
|
33
|
-
│ --- │
|
|
34
|
-
│ str │
|
|
35
|
-
╞══════╡
|
|
36
|
-
│ foo │
|
|
37
|
-
│ bar │
|
|
38
|
-
│ ham │
|
|
39
|
-
└──────┘
|
|
40
|
-
"""
|
|
41
|
-
return wrap_expr(self._pyexpr.cat_get_categories())
|
|
42
|
-
|
|
43
|
-
def len_bytes(self) -> Expr:
|
|
44
|
-
"""
|
|
45
|
-
Return the byte-length of the string representation of each value.
|
|
46
|
-
|
|
47
|
-
Returns
|
|
48
|
-
-------
|
|
49
|
-
Expr
|
|
50
|
-
Expression of data type :class:`UInt32`.
|
|
51
|
-
|
|
52
|
-
See Also
|
|
53
|
-
--------
|
|
54
|
-
len_chars
|
|
55
|
-
|
|
56
|
-
Notes
|
|
57
|
-
-----
|
|
58
|
-
When working with non-ASCII text, the length in bytes is not the same as the
|
|
59
|
-
length in characters. You may want to use :func:`len_chars` instead.
|
|
60
|
-
Note that :func:`len_bytes` is much more performant (_O(1)_) than
|
|
61
|
-
:func:`len_chars` (_O(n)_).
|
|
62
|
-
|
|
63
|
-
Examples
|
|
64
|
-
--------
|
|
65
|
-
>>> df = pl.DataFrame(
|
|
66
|
-
... {"a": pl.Series(["Café", "345", "東京", None], dtype=pl.Categorical)}
|
|
67
|
-
... )
|
|
68
|
-
>>> df.with_columns(
|
|
69
|
-
... pl.col("a").cat.len_bytes().alias("n_bytes"),
|
|
70
|
-
... pl.col("a").cat.len_chars().alias("n_chars"),
|
|
71
|
-
... )
|
|
72
|
-
shape: (4, 3)
|
|
73
|
-
┌──────┬─────────┬─────────┐
|
|
74
|
-
│ a ┆ n_bytes ┆ n_chars │
|
|
75
|
-
│ --- ┆ --- ┆ --- │
|
|
76
|
-
│ cat ┆ u32 ┆ u32 │
|
|
77
|
-
╞══════╪═════════╪═════════╡
|
|
78
|
-
│ Café ┆ 5 ┆ 4 │
|
|
79
|
-
│ 345 ┆ 3 ┆ 3 │
|
|
80
|
-
│ 東京 ┆ 6 ┆ 2 │
|
|
81
|
-
│ null ┆ null ┆ null │
|
|
82
|
-
└──────┴─────────┴─────────┘
|
|
83
|
-
"""
|
|
84
|
-
return wrap_expr(self._pyexpr.cat_len_bytes())
|
|
85
|
-
|
|
86
|
-
def len_chars(self) -> Expr:
|
|
87
|
-
"""
|
|
88
|
-
Return the number of characters of the string representation of each value.
|
|
89
|
-
|
|
90
|
-
Returns
|
|
91
|
-
-------
|
|
92
|
-
Expr
|
|
93
|
-
Expression of data type :class:`UInt32`.
|
|
94
|
-
|
|
95
|
-
See Also
|
|
96
|
-
--------
|
|
97
|
-
len_bytes
|
|
98
|
-
|
|
99
|
-
Notes
|
|
100
|
-
-----
|
|
101
|
-
When working with ASCII text, use :func:`len_bytes` instead to achieve
|
|
102
|
-
equivalent output with much better performance:
|
|
103
|
-
:func:`len_bytes` runs in _O(1)_, while :func:`len_chars` runs in (_O(n)_).
|
|
104
|
-
|
|
105
|
-
A character is defined as a `Unicode scalar value`_. A single character is
|
|
106
|
-
represented by a single byte when working with ASCII text, and a maximum of
|
|
107
|
-
4 bytes otherwise.
|
|
108
|
-
|
|
109
|
-
.. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value
|
|
110
|
-
|
|
111
|
-
Examples
|
|
112
|
-
--------
|
|
113
|
-
>>> df = pl.DataFrame(
|
|
114
|
-
... {"a": pl.Series(["Café", "345", "東京", None], dtype=pl.Categorical)}
|
|
115
|
-
... )
|
|
116
|
-
>>> df.with_columns(
|
|
117
|
-
... pl.col("a").cat.len_chars().alias("n_chars"),
|
|
118
|
-
... pl.col("a").cat.len_bytes().alias("n_bytes"),
|
|
119
|
-
... )
|
|
120
|
-
shape: (4, 3)
|
|
121
|
-
┌──────┬─────────┬─────────┐
|
|
122
|
-
│ a ┆ n_chars ┆ n_bytes │
|
|
123
|
-
│ --- ┆ --- ┆ --- │
|
|
124
|
-
│ cat ┆ u32 ┆ u32 │
|
|
125
|
-
╞══════╪═════════╪═════════╡
|
|
126
|
-
│ Café ┆ 4 ┆ 5 │
|
|
127
|
-
│ 345 ┆ 3 ┆ 3 │
|
|
128
|
-
│ 東京 ┆ 2 ┆ 6 │
|
|
129
|
-
│ null ┆ null ┆ null │
|
|
130
|
-
└──────┴─────────┴─────────┘
|
|
131
|
-
"""
|
|
132
|
-
return wrap_expr(self._pyexpr.cat_len_chars())
|
|
133
|
-
|
|
134
|
-
def starts_with(self, prefix: str) -> Expr:
|
|
135
|
-
"""
|
|
136
|
-
Check if string representations of values start with a substring.
|
|
137
|
-
|
|
138
|
-
Parameters
|
|
139
|
-
----------
|
|
140
|
-
prefix
|
|
141
|
-
Prefix substring.
|
|
142
|
-
|
|
143
|
-
See Also
|
|
144
|
-
--------
|
|
145
|
-
contains : Check if string repr contains a substring that matches a pattern.
|
|
146
|
-
ends_with : Check if string repr end with a substring.
|
|
147
|
-
|
|
148
|
-
Notes
|
|
149
|
-
-----
|
|
150
|
-
Whereas `str.starts_with` allows expression inputs, `cat.starts_with` requires
|
|
151
|
-
a literal string value.
|
|
152
|
-
|
|
153
|
-
Examples
|
|
154
|
-
--------
|
|
155
|
-
>>> df = pl.DataFrame(
|
|
156
|
-
... {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
|
|
157
|
-
... )
|
|
158
|
-
>>> df.with_columns(
|
|
159
|
-
... pl.col("fruits").cat.starts_with("app").alias("has_prefix"),
|
|
160
|
-
... )
|
|
161
|
-
shape: (3, 2)
|
|
162
|
-
┌────────┬────────────┐
|
|
163
|
-
│ fruits ┆ has_prefix │
|
|
164
|
-
│ --- ┆ --- │
|
|
165
|
-
│ cat ┆ bool │
|
|
166
|
-
╞════════╪════════════╡
|
|
167
|
-
│ apple ┆ true │
|
|
168
|
-
│ mango ┆ false │
|
|
169
|
-
│ null ┆ null │
|
|
170
|
-
└────────┴────────────┘
|
|
171
|
-
|
|
172
|
-
Using `starts_with` as a filter condition:
|
|
173
|
-
|
|
174
|
-
>>> df.filter(pl.col("fruits").cat.starts_with("app"))
|
|
175
|
-
shape: (1, 1)
|
|
176
|
-
┌────────┐
|
|
177
|
-
│ fruits │
|
|
178
|
-
│ --- │
|
|
179
|
-
│ cat │
|
|
180
|
-
╞════════╡
|
|
181
|
-
│ apple │
|
|
182
|
-
└────────┘
|
|
183
|
-
"""
|
|
184
|
-
if not isinstance(prefix, str):
|
|
185
|
-
msg = f"'prefix' must be a string; found {qualified_type_name(prefix)!r}"
|
|
186
|
-
raise TypeError(msg)
|
|
187
|
-
return wrap_expr(self._pyexpr.cat_starts_with(prefix))
|
|
188
|
-
|
|
189
|
-
def ends_with(self, suffix: str) -> Expr:
|
|
190
|
-
"""
|
|
191
|
-
Check if string representations of values end with a substring.
|
|
192
|
-
|
|
193
|
-
Parameters
|
|
194
|
-
----------
|
|
195
|
-
suffix
|
|
196
|
-
Suffix substring.
|
|
197
|
-
|
|
198
|
-
See Also
|
|
199
|
-
--------
|
|
200
|
-
contains : Check if string reprs contains a substring that matches a pattern.
|
|
201
|
-
starts_with : Check if string reprs start with a substring.
|
|
202
|
-
|
|
203
|
-
Notes
|
|
204
|
-
-----
|
|
205
|
-
Whereas `str.ends_with` allows expression inputs, `cat.ends_with` requires a
|
|
206
|
-
literal string value.
|
|
207
|
-
|
|
208
|
-
Examples
|
|
209
|
-
--------
|
|
210
|
-
>>> df = pl.DataFrame(
|
|
211
|
-
... {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
|
|
212
|
-
... )
|
|
213
|
-
>>> df.with_columns(pl.col("fruits").cat.ends_with("go").alias("has_suffix"))
|
|
214
|
-
shape: (3, 2)
|
|
215
|
-
┌────────┬────────────┐
|
|
216
|
-
│ fruits ┆ has_suffix │
|
|
217
|
-
│ --- ┆ --- │
|
|
218
|
-
│ cat ┆ bool │
|
|
219
|
-
╞════════╪════════════╡
|
|
220
|
-
│ apple ┆ false │
|
|
221
|
-
│ mango ┆ true │
|
|
222
|
-
│ null ┆ null │
|
|
223
|
-
└────────┴────────────┘
|
|
224
|
-
|
|
225
|
-
Using `ends_with` as a filter condition:
|
|
226
|
-
|
|
227
|
-
>>> df.filter(pl.col("fruits").cat.ends_with("go"))
|
|
228
|
-
shape: (1, 1)
|
|
229
|
-
┌────────┐
|
|
230
|
-
│ fruits │
|
|
231
|
-
│ --- │
|
|
232
|
-
│ cat │
|
|
233
|
-
╞════════╡
|
|
234
|
-
│ mango │
|
|
235
|
-
└────────┘
|
|
236
|
-
"""
|
|
237
|
-
if not isinstance(suffix, str):
|
|
238
|
-
msg = f"'suffix' must be a string; found {qualified_type_name(suffix)!r}"
|
|
239
|
-
raise TypeError(msg)
|
|
240
|
-
return wrap_expr(self._pyexpr.cat_ends_with(suffix))
|
|
241
|
-
|
|
242
|
-
def slice(self, offset: int, length: int | None = None) -> Expr:
|
|
243
|
-
"""
|
|
244
|
-
Extract a substring from the string representation of each value.
|
|
245
|
-
|
|
246
|
-
Parameters
|
|
247
|
-
----------
|
|
248
|
-
offset
|
|
249
|
-
Start index. Negative indexing is supported.
|
|
250
|
-
length
|
|
251
|
-
Length of the slice. If set to `None` (default), the slice is taken to the
|
|
252
|
-
end of the string.
|
|
253
|
-
|
|
254
|
-
Returns
|
|
255
|
-
-------
|
|
256
|
-
Expr
|
|
257
|
-
Expression of data type :class:`String`.
|
|
258
|
-
|
|
259
|
-
Notes
|
|
260
|
-
-----
|
|
261
|
-
Both the `offset` and `length` inputs are defined in terms of the number
|
|
262
|
-
of characters in the (UTF8) string. A character is defined as a
|
|
263
|
-
`Unicode scalar value`_. A single character is represented by a single byte
|
|
264
|
-
when working with ASCII text, and a maximum of 4 bytes otherwise.
|
|
265
|
-
|
|
266
|
-
.. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value
|
|
267
|
-
|
|
268
|
-
Examples
|
|
269
|
-
--------
|
|
270
|
-
>>> df = pl.DataFrame(
|
|
271
|
-
... {
|
|
272
|
-
... "s": pl.Series(
|
|
273
|
-
... ["pear", None, "papaya", "dragonfruit"],
|
|
274
|
-
... dtype=pl.Categorical,
|
|
275
|
-
... )
|
|
276
|
-
... }
|
|
277
|
-
... )
|
|
278
|
-
>>> df.with_columns(pl.col("s").cat.slice(-3).alias("slice"))
|
|
279
|
-
shape: (4, 2)
|
|
280
|
-
┌─────────────┬───────┐
|
|
281
|
-
│ s ┆ slice │
|
|
282
|
-
│ --- ┆ --- │
|
|
283
|
-
│ cat ┆ str │
|
|
284
|
-
╞═════════════╪═══════╡
|
|
285
|
-
│ pear ┆ ear │
|
|
286
|
-
│ null ┆ null │
|
|
287
|
-
│ papaya ┆ aya │
|
|
288
|
-
│ dragonfruit ┆ uit │
|
|
289
|
-
└─────────────┴───────┘
|
|
290
|
-
|
|
291
|
-
Using the optional `length` parameter
|
|
292
|
-
|
|
293
|
-
>>> df.with_columns(pl.col("s").cat.slice(4, length=3).alias("slice"))
|
|
294
|
-
shape: (4, 2)
|
|
295
|
-
┌─────────────┬───────┐
|
|
296
|
-
│ s ┆ slice │
|
|
297
|
-
│ --- ┆ --- │
|
|
298
|
-
│ cat ┆ str │
|
|
299
|
-
╞═════════════╪═══════╡
|
|
300
|
-
│ pear ┆ │
|
|
301
|
-
│ null ┆ null │
|
|
302
|
-
│ papaya ┆ ya │
|
|
303
|
-
│ dragonfruit ┆ onf │
|
|
304
|
-
└─────────────┴───────┘
|
|
305
|
-
"""
|
|
306
|
-
return wrap_expr(self._pyexpr.cat_slice(offset, length))
|