polars-runtime-compat 1.34.0b3__cp39-abi3-win_amd64.whl → 1.34.0b4__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  2. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
  3. polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
polars/expr/binary.py DELETED
@@ -1,346 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- from polars._utils.parse import parse_into_expression
6
- from polars._utils.various import scale_bytes
7
- from polars._utils.wrap import wrap_expr
8
- from polars.datatypes import parse_into_datatype_expr
9
-
10
- if TYPE_CHECKING:
11
- from polars import DataTypeExpr, Expr
12
- from polars._typing import (
13
- Endianness,
14
- IntoExpr,
15
- PolarsDataType,
16
- SizeUnit,
17
- TransferEncoding,
18
- )
19
-
20
-
21
- class ExprBinaryNameSpace:
22
- """Namespace for bin related expressions."""
23
-
24
- _accessor = "bin"
25
-
26
- def __init__(self, expr: Expr) -> None:
27
- self._pyexpr = expr._pyexpr
28
-
29
- def contains(self, literal: IntoExpr) -> Expr:
30
- r"""
31
- Check if binaries in Series contain a binary substring.
32
-
33
- Parameters
34
- ----------
35
- literal
36
- The binary substring to look for
37
-
38
- Returns
39
- -------
40
- Expr
41
- Expression of data type :class:`Boolean`.
42
-
43
- See Also
44
- --------
45
- starts_with : Check if the binary substring exists at the start
46
- ends_with : Check if the binary substring exists at the end
47
-
48
- Examples
49
- --------
50
- >>> colors = pl.DataFrame(
51
- ... {
52
- ... "name": ["black", "yellow", "blue"],
53
- ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
54
- ... "lit": [b"\x00", b"\xff\x00", b"\xff\xff"],
55
- ... }
56
- ... )
57
- >>> colors.select(
58
- ... "name",
59
- ... pl.col("code").bin.contains(b"\xff").alias("contains_with_lit"),
60
- ... pl.col("code").bin.contains(pl.col("lit")).alias("contains_with_expr"),
61
- ... )
62
- shape: (3, 3)
63
- ┌────────┬───────────────────┬────────────────────┐
64
- │ name ┆ contains_with_lit ┆ contains_with_expr │
65
- │ --- ┆ --- ┆ --- │
66
- │ str ┆ bool ┆ bool │
67
- ╞════════╪═══════════════════╪════════════════════╡
68
- │ black ┆ false ┆ true │
69
- │ yellow ┆ true ┆ true │
70
- │ blue ┆ true ┆ false │
71
- └────────┴───────────────────┴────────────────────┘
72
- """
73
- literal_pyexpr = parse_into_expression(literal, str_as_lit=True)
74
- return wrap_expr(self._pyexpr.bin_contains(literal_pyexpr))
75
-
76
- def ends_with(self, suffix: IntoExpr) -> Expr:
77
- r"""
78
- Check if string values end with a binary substring.
79
-
80
- Parameters
81
- ----------
82
- suffix
83
- Suffix substring.
84
-
85
- Returns
86
- -------
87
- Expr
88
- Expression of data type :class:`Boolean`.
89
-
90
- See Also
91
- --------
92
- starts_with : Check if the binary substring exists at the start
93
- contains : Check if the binary substring exists anywhere
94
-
95
- Examples
96
- --------
97
- >>> colors = pl.DataFrame(
98
- ... {
99
- ... "name": ["black", "yellow", "blue"],
100
- ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
101
- ... "suffix": [b"\x00", b"\xff\x00", b"\x00\x00"],
102
- ... }
103
- ... )
104
- >>> colors.select(
105
- ... "name",
106
- ... pl.col("code").bin.ends_with(b"\xff").alias("ends_with_lit"),
107
- ... pl.col("code").bin.ends_with(pl.col("suffix")).alias("ends_with_expr"),
108
- ... )
109
- shape: (3, 3)
110
- ┌────────┬───────────────┬────────────────┐
111
- │ name ┆ ends_with_lit ┆ ends_with_expr │
112
- │ --- ┆ --- ┆ --- │
113
- │ str ┆ bool ┆ bool │
114
- ╞════════╪═══════════════╪════════════════╡
115
- │ black ┆ false ┆ true │
116
- │ yellow ┆ false ┆ true │
117
- │ blue ┆ true ┆ false │
118
- └────────┴───────────────┴────────────────┘
119
- """
120
- suffix_pyexpr = parse_into_expression(suffix, str_as_lit=True)
121
- return wrap_expr(self._pyexpr.bin_ends_with(suffix_pyexpr))
122
-
123
- def starts_with(self, prefix: IntoExpr) -> Expr:
124
- r"""
125
- Check if values start with a binary substring.
126
-
127
- Parameters
128
- ----------
129
- prefix
130
- Prefix substring.
131
-
132
- Returns
133
- -------
134
- Expr
135
- Expression of data type :class:`Boolean`.
136
-
137
- See Also
138
- --------
139
- ends_with : Check if the binary substring exists at the end
140
- contains : Check if the binary substring exists anywhere
141
-
142
- Examples
143
- --------
144
- >>> colors = pl.DataFrame(
145
- ... {
146
- ... "name": ["black", "yellow", "blue"],
147
- ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
148
- ... "prefix": [b"\x00", b"\xff\x00", b"\x00\x00"],
149
- ... }
150
- ... )
151
- >>> colors.select(
152
- ... "name",
153
- ... pl.col("code").bin.starts_with(b"\xff").alias("starts_with_lit"),
154
- ... pl.col("code")
155
- ... .bin.starts_with(pl.col("prefix"))
156
- ... .alias("starts_with_expr"),
157
- ... )
158
- shape: (3, 3)
159
- ┌────────┬─────────────────┬──────────────────┐
160
- │ name ┆ starts_with_lit ┆ starts_with_expr │
161
- │ --- ┆ --- ┆ --- │
162
- │ str ┆ bool ┆ bool │
163
- ╞════════╪═════════════════╪══════════════════╡
164
- │ black ┆ false ┆ true │
165
- │ yellow ┆ true ┆ false │
166
- │ blue ┆ false ┆ true │
167
- └────────┴─────────────────┴──────────────────┘
168
- """
169
- prefix_pyexpr = parse_into_expression(prefix, str_as_lit=True)
170
- return wrap_expr(self._pyexpr.bin_starts_with(prefix_pyexpr))
171
-
172
- def decode(self, encoding: TransferEncoding, *, strict: bool = True) -> Expr:
173
- r"""
174
- Decode values using the provided encoding.
175
-
176
- Parameters
177
- ----------
178
- encoding : {'hex', 'base64'}
179
- The encoding to use.
180
- strict
181
- Raise an error if the underlying value cannot be decoded,
182
- otherwise mask out with a null value.
183
-
184
- Returns
185
- -------
186
- Expr
187
- Expression of data type :class:`Binary`.
188
-
189
- Examples
190
- --------
191
- >>> colors = pl.DataFrame(
192
- ... {
193
- ... "name": ["black", "yellow", "blue"],
194
- ... "encoded": [b"000000", b"ffff00", b"0000ff"],
195
- ... }
196
- ... )
197
- >>> colors.with_columns(
198
- ... pl.col("encoded").bin.decode("hex").alias("code"),
199
- ... )
200
- shape: (3, 3)
201
- ┌────────┬───────────┬─────────────────┐
202
- │ name ┆ encoded ┆ code │
203
- │ --- ┆ --- ┆ --- │
204
- │ str ┆ binary ┆ binary │
205
- ╞════════╪═══════════╪═════════════════╡
206
- │ black ┆ b"000000" ┆ b"\x00\x00\x00" │
207
- │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
208
- │ blue ┆ b"0000ff" ┆ b"\x00\x00\xff" │
209
- └────────┴───────────┴─────────────────┘
210
- """
211
- if encoding == "hex":
212
- return wrap_expr(self._pyexpr.bin_hex_decode(strict))
213
- elif encoding == "base64":
214
- return wrap_expr(self._pyexpr.bin_base64_decode(strict))
215
- else:
216
- msg = f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
217
- raise ValueError(msg)
218
-
219
- def encode(self, encoding: TransferEncoding) -> Expr:
220
- r"""
221
- Encode a value using the provided encoding.
222
-
223
- Parameters
224
- ----------
225
- encoding : {'hex', 'base64'}
226
- The encoding to use.
227
-
228
- Returns
229
- -------
230
- Expr
231
- Expression of data type :class:`Binary`.
232
-
233
- Examples
234
- --------
235
- >>> colors = pl.DataFrame(
236
- ... {
237
- ... "color": ["black", "yellow", "blue"],
238
- ... "code": [b"\x00\x00\x00", b"\xff\xff\x00", b"\x00\x00\xff"],
239
- ... }
240
- ... )
241
- >>> colors.with_columns(
242
- ... pl.col("code").bin.encode("hex").alias("encoded"),
243
- ... )
244
- shape: (3, 3)
245
- ┌────────┬─────────────────┬─────────┐
246
- │ color ┆ code ┆ encoded │
247
- │ --- ┆ --- ┆ --- │
248
- │ str ┆ binary ┆ str │
249
- ╞════════╪═════════════════╪═════════╡
250
- │ black ┆ b"\x00\x00\x00" ┆ 000000 │
251
- │ yellow ┆ b"\xff\xff\x00" ┆ ffff00 │
252
- │ blue ┆ b"\x00\x00\xff" ┆ 0000ff │
253
- └────────┴─────────────────┴─────────┘
254
- """
255
- if encoding == "hex":
256
- return wrap_expr(self._pyexpr.bin_hex_encode())
257
- elif encoding == "base64":
258
- return wrap_expr(self._pyexpr.bin_base64_encode())
259
- else:
260
- msg = f"`encoding` must be one of {{'hex', 'base64'}}, got {encoding!r}"
261
- raise ValueError(msg)
262
-
263
- def size(self, unit: SizeUnit = "b") -> Expr:
264
- r"""
265
- Get the size of binary values in the given unit.
266
-
267
- Parameters
268
- ----------
269
- unit : {'b', 'kb', 'mb', 'gb', 'tb'}
270
- Scale the returned size to the given unit.
271
-
272
- Returns
273
- -------
274
- Expr
275
- Expression of data type :class:`UInt32` or `Float64`.
276
-
277
- Examples
278
- --------
279
- >>> from os import urandom
280
- >>> df = pl.DataFrame({"data": [urandom(n) for n in (512, 256, 1024)]})
281
- >>> df.with_columns( # doctest: +IGNORE_RESULT
282
- ... n_bytes=pl.col("data").bin.size(),
283
- ... n_kilobytes=pl.col("data").bin.size("kb"),
284
- ... )
285
- shape: (4, 3)
286
- ┌─────────────────────────────────┬─────────┬─────────────┐
287
- │ data ┆ n_bytes ┆ n_kilobytes │
288
- │ --- ┆ --- ┆ --- │
289
- │ binary ┆ u32 ┆ f64 │
290
- ╞═════════════════════════════════╪═════════╪═════════════╡
291
- │ b"y?~B\x83\xf4V\x07\xd3\xfb\xb… ┆ 512 ┆ 0.5 │
292
- │ b"\xee$4@f\xc14\x07\x8e\x88\x1… ┆ 256 ┆ 0.25 │
293
- │ b"\x80\xbd\xb9nEq;2\x99$\xf9\x… ┆ 1024 ┆ 1.0 │
294
- └─────────────────────────────────┴─────────┴─────────────┘
295
- """
296
- sz = wrap_expr(self._pyexpr.bin_size_bytes())
297
- sz = scale_bytes(sz, unit)
298
- return sz
299
-
300
- def reinterpret(
301
- self, *, dtype: PolarsDataType | DataTypeExpr, endianness: Endianness = "little"
302
- ) -> Expr:
303
- r"""
304
- Interpret bytes as another type.
305
-
306
- Supported types are numerical or temporal dtypes, or an ``Array`` of
307
- these dtypes.
308
-
309
- Parameters
310
- ----------
311
- dtype : PolarsDataType
312
- Which type to interpret binary column into.
313
- endianness : {"big", "little"}, optional
314
- Which endianness to use when interpreting bytes, by default "little".
315
-
316
- Returns
317
- -------
318
- Expr
319
- Expression of data type `dtype`.
320
- Note that rows of the binary array where the length does not match
321
- the size in bytes of the output array (number of items * byte size
322
- of item) will become NULL.
323
-
324
- Examples
325
- --------
326
- >>> df = pl.DataFrame({"data": [b"\x05\x00\x00\x00", b"\x10\x00\x01\x00"]})
327
- >>> df.with_columns( # doctest: +IGNORE_RESULT
328
- ... bin2int=pl.col("data").bin.reinterpret(
329
- ... dtype=pl.Int32, endianness="little"
330
- ... ),
331
- ... )
332
- shape: (2, 2)
333
- ┌─────────────────────┬─────────┐
334
- │ data ┆ bin2int │
335
- │ --- ┆ --- │
336
- │ binary ┆ i32 │
337
- ╞═════════════════════╪═════════╡
338
- │ b"\x05\x00\x00\x00" ┆ 5 │
339
- │ b"\x10\x00\x01\x00" ┆ 65552 │
340
- └─────────────────────┴─────────┘
341
- """
342
- dtype = parse_into_datatype_expr(dtype)
343
-
344
- return wrap_expr(
345
- self._pyexpr.bin_reinterpret(dtype._pydatatype_expr, endianness)
346
- )
@@ -1,306 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- from polars._utils.various import qualified_type_name
6
- from polars._utils.wrap import wrap_expr
7
-
8
- if TYPE_CHECKING:
9
- from polars import Expr
10
-
11
-
12
- class ExprCatNameSpace:
13
- """Namespace for categorical related expressions."""
14
-
15
- _accessor = "cat"
16
-
17
- def __init__(self, expr: Expr) -> None:
18
- self._pyexpr = expr._pyexpr
19
-
20
- def get_categories(self) -> Expr:
21
- """
22
- Get the categories stored in this data type.
23
-
24
- Examples
25
- --------
26
- >>> df = pl.Series(
27
- ... "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
28
- ... ).to_frame()
29
- >>> df.select(pl.col("cats").cat.get_categories()) # doctest: +SKIP
30
- shape: (3, 1)
31
- ┌──────┐
32
- │ cats │
33
- │ --- │
34
- │ str │
35
- ╞══════╡
36
- │ foo │
37
- │ bar │
38
- │ ham │
39
- └──────┘
40
- """
41
- return wrap_expr(self._pyexpr.cat_get_categories())
42
-
43
- def len_bytes(self) -> Expr:
44
- """
45
- Return the byte-length of the string representation of each value.
46
-
47
- Returns
48
- -------
49
- Expr
50
- Expression of data type :class:`UInt32`.
51
-
52
- See Also
53
- --------
54
- len_chars
55
-
56
- Notes
57
- -----
58
- When working with non-ASCII text, the length in bytes is not the same as the
59
- length in characters. You may want to use :func:`len_chars` instead.
60
- Note that :func:`len_bytes` is much more performant (_O(1)_) than
61
- :func:`len_chars` (_O(n)_).
62
-
63
- Examples
64
- --------
65
- >>> df = pl.DataFrame(
66
- ... {"a": pl.Series(["Café", "345", "東京", None], dtype=pl.Categorical)}
67
- ... )
68
- >>> df.with_columns(
69
- ... pl.col("a").cat.len_bytes().alias("n_bytes"),
70
- ... pl.col("a").cat.len_chars().alias("n_chars"),
71
- ... )
72
- shape: (4, 3)
73
- ┌──────┬─────────┬─────────┐
74
- │ a ┆ n_bytes ┆ n_chars │
75
- │ --- ┆ --- ┆ --- │
76
- │ cat ┆ u32 ┆ u32 │
77
- ╞══════╪═════════╪═════════╡
78
- │ Café ┆ 5 ┆ 4 │
79
- │ 345 ┆ 3 ┆ 3 │
80
- │ 東京 ┆ 6 ┆ 2 │
81
- │ null ┆ null ┆ null │
82
- └──────┴─────────┴─────────┘
83
- """
84
- return wrap_expr(self._pyexpr.cat_len_bytes())
85
-
86
- def len_chars(self) -> Expr:
87
- """
88
- Return the number of characters of the string representation of each value.
89
-
90
- Returns
91
- -------
92
- Expr
93
- Expression of data type :class:`UInt32`.
94
-
95
- See Also
96
- --------
97
- len_bytes
98
-
99
- Notes
100
- -----
101
- When working with ASCII text, use :func:`len_bytes` instead to achieve
102
- equivalent output with much better performance:
103
- :func:`len_bytes` runs in _O(1)_, while :func:`len_chars` runs in (_O(n)_).
104
-
105
- A character is defined as a `Unicode scalar value`_. A single character is
106
- represented by a single byte when working with ASCII text, and a maximum of
107
- 4 bytes otherwise.
108
-
109
- .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value
110
-
111
- Examples
112
- --------
113
- >>> df = pl.DataFrame(
114
- ... {"a": pl.Series(["Café", "345", "東京", None], dtype=pl.Categorical)}
115
- ... )
116
- >>> df.with_columns(
117
- ... pl.col("a").cat.len_chars().alias("n_chars"),
118
- ... pl.col("a").cat.len_bytes().alias("n_bytes"),
119
- ... )
120
- shape: (4, 3)
121
- ┌──────┬─────────┬─────────┐
122
- │ a ┆ n_chars ┆ n_bytes │
123
- │ --- ┆ --- ┆ --- │
124
- │ cat ┆ u32 ┆ u32 │
125
- ╞══════╪═════════╪═════════╡
126
- │ Café ┆ 4 ┆ 5 │
127
- │ 345 ┆ 3 ┆ 3 │
128
- │ 東京 ┆ 2 ┆ 6 │
129
- │ null ┆ null ┆ null │
130
- └──────┴─────────┴─────────┘
131
- """
132
- return wrap_expr(self._pyexpr.cat_len_chars())
133
-
134
- def starts_with(self, prefix: str) -> Expr:
135
- """
136
- Check if string representations of values start with a substring.
137
-
138
- Parameters
139
- ----------
140
- prefix
141
- Prefix substring.
142
-
143
- See Also
144
- --------
145
- contains : Check if string repr contains a substring that matches a pattern.
146
- ends_with : Check if string repr end with a substring.
147
-
148
- Notes
149
- -----
150
- Whereas `str.starts_with` allows expression inputs, `cat.starts_with` requires
151
- a literal string value.
152
-
153
- Examples
154
- --------
155
- >>> df = pl.DataFrame(
156
- ... {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
157
- ... )
158
- >>> df.with_columns(
159
- ... pl.col("fruits").cat.starts_with("app").alias("has_prefix"),
160
- ... )
161
- shape: (3, 2)
162
- ┌────────┬────────────┐
163
- │ fruits ┆ has_prefix │
164
- │ --- ┆ --- │
165
- │ cat ┆ bool │
166
- ╞════════╪════════════╡
167
- │ apple ┆ true │
168
- │ mango ┆ false │
169
- │ null ┆ null │
170
- └────────┴────────────┘
171
-
172
- Using `starts_with` as a filter condition:
173
-
174
- >>> df.filter(pl.col("fruits").cat.starts_with("app"))
175
- shape: (1, 1)
176
- ┌────────┐
177
- │ fruits │
178
- │ --- │
179
- │ cat │
180
- ╞════════╡
181
- │ apple │
182
- └────────┘
183
- """
184
- if not isinstance(prefix, str):
185
- msg = f"'prefix' must be a string; found {qualified_type_name(prefix)!r}"
186
- raise TypeError(msg)
187
- return wrap_expr(self._pyexpr.cat_starts_with(prefix))
188
-
189
- def ends_with(self, suffix: str) -> Expr:
190
- """
191
- Check if string representations of values end with a substring.
192
-
193
- Parameters
194
- ----------
195
- suffix
196
- Suffix substring.
197
-
198
- See Also
199
- --------
200
- contains : Check if string reprs contains a substring that matches a pattern.
201
- starts_with : Check if string reprs start with a substring.
202
-
203
- Notes
204
- -----
205
- Whereas `str.ends_with` allows expression inputs, `cat.ends_with` requires a
206
- literal string value.
207
-
208
- Examples
209
- --------
210
- >>> df = pl.DataFrame(
211
- ... {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
212
- ... )
213
- >>> df.with_columns(pl.col("fruits").cat.ends_with("go").alias("has_suffix"))
214
- shape: (3, 2)
215
- ┌────────┬────────────┐
216
- │ fruits ┆ has_suffix │
217
- │ --- ┆ --- │
218
- │ cat ┆ bool │
219
- ╞════════╪════════════╡
220
- │ apple ┆ false │
221
- │ mango ┆ true │
222
- │ null ┆ null │
223
- └────────┴────────────┘
224
-
225
- Using `ends_with` as a filter condition:
226
-
227
- >>> df.filter(pl.col("fruits").cat.ends_with("go"))
228
- shape: (1, 1)
229
- ┌────────┐
230
- │ fruits │
231
- │ --- │
232
- │ cat │
233
- ╞════════╡
234
- │ mango │
235
- └────────┘
236
- """
237
- if not isinstance(suffix, str):
238
- msg = f"'suffix' must be a string; found {qualified_type_name(suffix)!r}"
239
- raise TypeError(msg)
240
- return wrap_expr(self._pyexpr.cat_ends_with(suffix))
241
-
242
- def slice(self, offset: int, length: int | None = None) -> Expr:
243
- """
244
- Extract a substring from the string representation of each value.
245
-
246
- Parameters
247
- ----------
248
- offset
249
- Start index. Negative indexing is supported.
250
- length
251
- Length of the slice. If set to `None` (default), the slice is taken to the
252
- end of the string.
253
-
254
- Returns
255
- -------
256
- Expr
257
- Expression of data type :class:`String`.
258
-
259
- Notes
260
- -----
261
- Both the `offset` and `length` inputs are defined in terms of the number
262
- of characters in the (UTF8) string. A character is defined as a
263
- `Unicode scalar value`_. A single character is represented by a single byte
264
- when working with ASCII text, and a maximum of 4 bytes otherwise.
265
-
266
- .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value
267
-
268
- Examples
269
- --------
270
- >>> df = pl.DataFrame(
271
- ... {
272
- ... "s": pl.Series(
273
- ... ["pear", None, "papaya", "dragonfruit"],
274
- ... dtype=pl.Categorical,
275
- ... )
276
- ... }
277
- ... )
278
- >>> df.with_columns(pl.col("s").cat.slice(-3).alias("slice"))
279
- shape: (4, 2)
280
- ┌─────────────┬───────┐
281
- │ s ┆ slice │
282
- │ --- ┆ --- │
283
- │ cat ┆ str │
284
- ╞═════════════╪═══════╡
285
- │ pear ┆ ear │
286
- │ null ┆ null │
287
- │ papaya ┆ aya │
288
- │ dragonfruit ┆ uit │
289
- └─────────────┴───────┘
290
-
291
- Using the optional `length` parameter
292
-
293
- >>> df.with_columns(pl.col("s").cat.slice(4, length=3).alias("slice"))
294
- shape: (4, 2)
295
- ┌─────────────┬───────┐
296
- │ s ┆ slice │
297
- │ --- ┆ --- │
298
- │ cat ┆ str │
299
- ╞═════════════╪═══════╡
300
- │ pear ┆ │
301
- │ null ┆ null │
302
- │ papaya ┆ ya │
303
- │ dragonfruit ┆ onf │
304
- └─────────────┴───────┘
305
- """
306
- return wrap_expr(self._pyexpr.cat_slice(offset, length))