polars-runtime-compat 1.34.0b3__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b5__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (204) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  2. polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
  3. polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
  202. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
  204. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/selectors.py DELETED
@@ -1,3117 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import builtins
4
- import contextlib
5
- import datetime as pydatetime
6
- import sys
7
- from collections.abc import Collection, Mapping, Sequence
8
- from decimal import Decimal as PyDecimal
9
- from functools import reduce
10
- from operator import or_
11
- from typing import (
12
- TYPE_CHECKING,
13
- Any,
14
- Literal,
15
- NoReturn,
16
- overload,
17
- )
18
-
19
- import polars.datatypes.classes as pldt
20
- from polars import functions as F
21
- from polars._utils.parse.expr import _parse_inputs_as_iterable
22
- from polars._utils.unstable import unstable
23
- from polars._utils.various import is_column, re_escape
24
- from polars.datatypes import (
25
- Binary,
26
- Boolean,
27
- Categorical,
28
- Date,
29
- String,
30
- Time,
31
- is_polars_dtype,
32
- )
33
- from polars.expr import Expr
34
-
35
- with contextlib.suppress(ImportError): # Module not available when building docs
36
- from polars._plr import PyExpr, PySelector
37
-
38
- if sys.version_info >= (3, 10):
39
- from types import NoneType
40
- else: # pragma: no cover
41
- # Define equivalent for older Python versions
42
- NoneType = type(None)
43
-
44
- if TYPE_CHECKING:
45
- from collections.abc import Iterable
46
-
47
- from polars import DataFrame, LazyFrame
48
- from polars._typing import PolarsDataType, PythonDataType, TimeUnit
49
-
50
- __all__ = [
51
- # class
52
- "Selector",
53
- # functions
54
- "all",
55
- "alpha",
56
- "alphanumeric",
57
- "array",
58
- "binary",
59
- "boolean",
60
- "by_dtype",
61
- "by_index",
62
- "by_name",
63
- "categorical",
64
- "contains",
65
- "date",
66
- "datetime",
67
- "decimal",
68
- "digit",
69
- "duration",
70
- "ends_with",
71
- "enum",
72
- "exclude",
73
- "expand_selector",
74
- "first",
75
- "float",
76
- "integer",
77
- "is_selector",
78
- "last",
79
- "list",
80
- "matches",
81
- "nested",
82
- "numeric",
83
- "signed_integer",
84
- "starts_with",
85
- "string",
86
- "struct",
87
- "temporal",
88
- "time",
89
- "unsigned_integer",
90
- ]
91
-
92
-
93
- @overload
94
- def is_selector(obj: Selector) -> Literal[True]: ...
95
-
96
-
97
- @overload
98
- def is_selector(obj: Any) -> Literal[False]: ...
99
-
100
-
101
- def is_selector(obj: Any) -> bool:
102
- """
103
- Indicate whether the given object/expression is a selector.
104
-
105
- Examples
106
- --------
107
- >>> from polars.selectors import is_selector
108
- >>> import polars.selectors as cs
109
- >>> is_selector(pl.col("colx"))
110
- False
111
- >>> is_selector(cs.first() | cs.last())
112
- True
113
- """
114
- return isinstance(obj, Selector)
115
-
116
-
117
- # TODO: Don't use this as it collects a schema (can be very expensive for LazyFrame).
118
- # This should move to IR conversion / Rust.
119
- def expand_selector(
120
- target: DataFrame | LazyFrame | Mapping[str, PolarsDataType],
121
- selector: Selector | Expr,
122
- *,
123
- strict: bool = True,
124
- ) -> tuple[str, ...]:
125
- """
126
- Expand selector to column names, with respect to a specific frame or target schema.
127
-
128
- .. versionadded:: 0.20.30
129
- The `strict` parameter was added.
130
-
131
- Parameters
132
- ----------
133
- target
134
- A Polars DataFrame, LazyFrame or Schema.
135
- selector
136
- An arbitrary polars selector (or compound selector).
137
- strict
138
- Setting False additionally allows for a broader range of column selection
139
- expressions (such as bare columns or use of `.exclude()`) to be expanded,
140
- not just the dedicated selectors.
141
-
142
- Examples
143
- --------
144
- >>> import polars.selectors as cs
145
- >>> df = pl.DataFrame(
146
- ... {
147
- ... "colx": ["a", "b", "c"],
148
- ... "coly": [123, 456, 789],
149
- ... "colz": [2.0, 5.5, 8.0],
150
- ... }
151
- ... )
152
-
153
- Expand selector with respect to an existing `DataFrame`:
154
-
155
- >>> cs.expand_selector(df, cs.numeric())
156
- ('coly', 'colz')
157
- >>> cs.expand_selector(df, cs.first() | cs.last())
158
- ('colx', 'colz')
159
-
160
- This also works with `LazyFrame`:
161
-
162
- >>> cs.expand_selector(df.lazy(), ~(cs.first() | cs.last()))
163
- ('coly',)
164
-
165
- Expand selector with respect to a standalone `Schema` dict:
166
-
167
- >>> schema = {
168
- ... "id": pl.Int64,
169
- ... "desc": pl.String,
170
- ... "count": pl.UInt32,
171
- ... "value": pl.Float64,
172
- ... }
173
- >>> cs.expand_selector(schema, cs.string() | cs.float())
174
- ('desc', 'value')
175
-
176
- Allow for non-strict selection expressions (such as those
177
- including use of an `.exclude()` constraint) to be expanded:
178
-
179
- >>> cs.expand_selector(schema, cs.numeric().exclude("id"), strict=False)
180
- ('count', 'value')
181
- """
182
- if isinstance(target, Mapping):
183
- from polars.dataframe import DataFrame
184
-
185
- target = DataFrame(schema=target)
186
-
187
- if not (
188
- is_selector(selector)
189
- if strict
190
- else selector.meta.is_column_selection(allow_aliasing=False)
191
- ):
192
- msg = f"expected a selector; found {selector!r} instead."
193
- raise TypeError(msg)
194
-
195
- return tuple(target.select(selector).collect_schema())
196
-
197
-
198
- # TODO: Don't use this as it collects a schema (can be very expensive for LazyFrame).
199
- # This should move to IR conversion / Rust.
200
- def _expand_selectors(frame: DataFrame | LazyFrame, *items: Any) -> builtins.list[Any]:
201
- """
202
- Internal function that expands any selectors to column names in the given input.
203
-
204
- Non-selector values are left as-is.
205
-
206
- Examples
207
- --------
208
- >>> from polars.selectors import _expand_selectors
209
- >>> import polars.selectors as cs
210
- >>> df = pl.DataFrame(
211
- ... {
212
- ... "colw": ["a", "b"],
213
- ... "colx": ["x", "y"],
214
- ... "coly": [123, 456],
215
- ... "colz": [2.0, 5.5],
216
- ... }
217
- ... )
218
- >>> _expand_selectors(df, ["colx", cs.numeric()])
219
- ['colx', 'coly', 'colz']
220
- >>> _expand_selectors(df, cs.string(), cs.float())
221
- ['colw', 'colx', 'colz']
222
- """
223
- items_iter = _parse_inputs_as_iterable(items)
224
-
225
- expanded: builtins.list[Any] = []
226
- for item in items_iter:
227
- if is_selector(item):
228
- selector_cols = expand_selector(frame, item)
229
- expanded.extend(selector_cols)
230
- else:
231
- expanded.append(item)
232
- return expanded
233
-
234
-
235
- def _expand_selector_dicts(
236
- df: DataFrame,
237
- d: Mapping[Any, Any] | None,
238
- *,
239
- expand_keys: bool,
240
- expand_values: bool,
241
- tuple_keys: bool = False,
242
- ) -> dict[str, Any]:
243
- """Expand dict key/value selectors into their underlying column names."""
244
- expanded = {}
245
- for key, value in (d or {}).items():
246
- if expand_values and is_selector(value):
247
- expanded[key] = expand_selector(df, selector=value)
248
- value = expanded[key]
249
- if expand_keys and is_selector(key):
250
- cols = expand_selector(df, selector=key)
251
- if tuple_keys:
252
- expanded[cols] = value
253
- else:
254
- expanded.update(dict.fromkeys(cols, value))
255
- else:
256
- expanded[key] = value
257
- return expanded
258
-
259
-
260
- def _combine_as_selector(
261
- items: (
262
- str
263
- | Expr
264
- | PolarsDataType
265
- | Selector
266
- | Collection[str | Expr | PolarsDataType | Selector]
267
- ),
268
- *more_items: str | Expr | PolarsDataType | Selector,
269
- ) -> Selector:
270
- """Create a combined selector from cols, names, dtypes, and/or other selectors."""
271
- names, regexes, dtypes = [], [], []
272
- selectors: builtins.list[Selector] = []
273
- for item in (
274
- *(
275
- items
276
- if isinstance(items, Collection) and not isinstance(items, str)
277
- else [items]
278
- ),
279
- *more_items,
280
- ):
281
- if is_selector(item):
282
- selectors.append(item)
283
- elif is_polars_dtype(item):
284
- dtypes.append(item)
285
- elif isinstance(item, str):
286
- if item.startswith("^") and item.endswith("$"):
287
- regexes.append(item)
288
- else:
289
- names.append(item)
290
- elif is_column(item):
291
- names.append(item.meta.output_name()) # type: ignore[union-attr]
292
- else:
293
- msg = f"expected one or more `str`, `DataType` or selector; found {item!r} instead."
294
- raise TypeError(msg)
295
-
296
- selected = []
297
- if names:
298
- selected.append(by_name(*names, require_all=False))
299
- if dtypes:
300
- selected.append(by_dtype(*dtypes))
301
- if regexes:
302
- selected.append(
303
- matches(
304
- "|".join(f"({rx})" for rx in regexes)
305
- if len(regexes) > 1
306
- else regexes[0]
307
- )
308
- )
309
- if selectors:
310
- selected.extend(selectors)
311
-
312
- return reduce(or_, selected)
313
-
314
-
315
- class Selector(Expr):
316
- """Base column selector expression/proxy."""
317
-
318
- # NOTE: This `= None` is needed to generate the docs with sphinx_accessor.
319
- _pyselector: PySelector = None # type: ignore[assignment]
320
-
321
- @classmethod
322
- def _from_pyselector(cls, pyselector: PySelector) -> Selector:
323
- slf = cls()
324
- slf._pyselector = pyselector
325
- slf._pyexpr = PyExpr.new_selector(pyselector)
326
- return slf
327
-
328
- def __getstate__(self) -> bytes:
329
- return self._pyexpr.__getstate__()
330
-
331
- def __setstate__(self, state: bytes) -> None:
332
- self._pyexpr = F.lit(0)._pyexpr # Initialize with a dummy
333
- self._pyexpr.__setstate__(state)
334
- self._pyselector = self.meta.as_selector()._pyselector
335
-
336
- def __repr__(self) -> str:
337
- return str(Expr._from_pyexpr(self._pyexpr))
338
-
339
- def __hash__(self) -> int:
340
- # note: this is a suitable hash for selectors (but NOT expressions in general),
341
- # as the repr is guaranteed to be unique across all selector/param permutations
342
- return self._pyselector.hash()
343
-
344
- @classmethod
345
- def _by_dtype(
346
- cls, dtypes: builtins.list[PythonDataType | PolarsDataType]
347
- ) -> Selector:
348
- selectors = []
349
- concrete_dtypes = []
350
- for dt in dtypes:
351
- if is_polars_dtype(dt):
352
- if dt is pldt.Datetime:
353
- selectors += [datetime()]
354
- elif isinstance(dt, pldt.Datetime) and dt.time_zone == "*":
355
- selectors += [datetime(time_unit=dt.time_unit, time_zone="*")]
356
- elif dt is pldt.Duration:
357
- selectors += [duration()]
358
- elif dt is pldt.Categorical:
359
- selectors += [categorical()]
360
- elif dt is pldt.Enum:
361
- selectors += [enum()]
362
- elif dt is pldt.List:
363
- selectors += [list()]
364
- elif dt is pldt.Array:
365
- selectors += [array()]
366
- elif dt is pldt.Struct:
367
- selectors += [struct()]
368
- elif dt is pldt.Decimal:
369
- selectors += [decimal()]
370
- else:
371
- concrete_dtypes += [dt]
372
- elif isinstance(dt, type):
373
- if dt is int:
374
- selectors += [integer()]
375
- elif dt is builtins.float:
376
- selectors += [float()]
377
- elif dt is bool:
378
- selectors += [boolean()]
379
- elif dt is str:
380
- concrete_dtypes += [pldt.String()]
381
- elif dt is bytes:
382
- concrete_dtypes += [pldt.Binary()]
383
- elif dt is object:
384
- selectors += [object()]
385
- elif dt is NoneType:
386
- concrete_dtypes += [pldt.Null()]
387
- elif dt is pydatetime.time:
388
- concrete_dtypes += [pldt.Time()]
389
- elif dt is pydatetime.datetime:
390
- selectors += [datetime()]
391
- elif dt is pydatetime.timedelta:
392
- selectors += [duration()]
393
- elif dt is pydatetime.date:
394
- selectors += [date()]
395
- elif dt is PyDecimal:
396
- selectors += [decimal()]
397
- elif dt is builtins.list or dt is tuple:
398
- selectors += [list()]
399
- else:
400
- input_type = (
401
- input
402
- if type(input) is type
403
- else f"of type {type(input).__name__!r}"
404
- )
405
- input_detail = "" if type(input) is type else f" (given: {input!r})"
406
- msg = f"cannot parse input {input_type} into Polars selector{input_detail}"
407
- raise TypeError(msg) from None
408
- else:
409
- input_type = (
410
- input
411
- if type(input) is type
412
- else f"of type {type(input).__name__!r}"
413
- )
414
- input_detail = "" if type(input) is type else f" (given: {input!r})"
415
- msg = f"cannot parse input {input_type} into Polars selector{input_detail}"
416
- raise TypeError(msg) from None
417
-
418
- dtype_selector = cls._from_pyselector(PySelector.by_dtype(concrete_dtypes))
419
-
420
- if len(selectors) == 0:
421
- return dtype_selector
422
-
423
- selector = selectors[0]
424
- for s in selectors[1:]:
425
- selector = selector | s
426
- if len(concrete_dtypes) == 0:
427
- return selector
428
- else:
429
- return dtype_selector | selector
430
-
431
- @classmethod
432
- def _by_name(cls, names: builtins.list[str], *, strict: bool) -> Selector:
433
- return cls._from_pyselector(PySelector.by_name(names, strict))
434
-
435
- def __invert__(cls) -> Selector:
436
- """Invert the selector."""
437
- return all() - cls
438
-
439
- def __add__(self, other: Any) -> Expr:
440
- if is_selector(other):
441
- return self.as_expr().__add__(other.as_expr())
442
- else:
443
- return self.as_expr().__add__(other)
444
-
445
- def __radd__(self, other: Any) -> Expr:
446
- if is_selector(other):
447
- msg = "unsupported operand type(s) for op: ('Selector' + 'Selector')"
448
- raise TypeError(msg)
449
- else:
450
- return self.as_expr().__radd__(other)
451
-
452
- @overload
453
- def __and__(self, other: Selector) -> Selector: ...
454
-
455
- @overload
456
- def __and__(self, other: Any) -> Expr: ...
457
-
458
- def __and__(self, other: Any) -> Selector | Expr:
459
- if is_column(other): # @2.0: remove
460
- colname = other.meta.output_name()
461
- other = by_name(colname)
462
- if is_selector(other):
463
- return Selector._from_pyselector(
464
- PySelector.intersect(self._pyselector, other._pyselector)
465
- )
466
- else:
467
- return self.as_expr().__and__(other)
468
-
469
- def __rand__(self, other: Any) -> Expr:
470
- return self.as_expr().__rand__(other)
471
-
472
- @overload
473
- def __or__(self, other: Selector) -> Selector: ...
474
-
475
- @overload
476
- def __or__(self, other: Any) -> Expr: ...
477
-
478
- def __or__(self, other: Any) -> Selector | Expr:
479
- if is_column(other): # @2.0: remove
480
- other = by_name(other.meta.output_name())
481
- if is_selector(other):
482
- return Selector._from_pyselector(
483
- PySelector.union(self._pyselector, other._pyselector)
484
- )
485
- else:
486
- return self.as_expr().__or__(other)
487
-
488
- def __ror__(self, other: Any) -> Expr:
489
- if is_column(other):
490
- other = by_name(other.meta.output_name())
491
- return self.as_expr().__ror__(other)
492
-
493
- @overload
494
- def __sub__(self, other: Selector) -> Selector: ...
495
-
496
- @overload
497
- def __sub__(self, other: Any) -> Expr: ...
498
-
499
- def __sub__(self, other: Any) -> Selector | Expr:
500
- if is_selector(other):
501
- return Selector._from_pyselector(
502
- PySelector.difference(self._pyselector, other._pyselector)
503
- )
504
- else:
505
- return self.as_expr().__sub__(other)
506
-
507
- def __rsub__(self, other: Any) -> NoReturn:
508
- msg = "unsupported operand type(s) for op: ('Expr' - 'Selector')"
509
- raise TypeError(msg)
510
-
511
- @overload
512
- def __xor__(self, other: Selector) -> Selector: ...
513
-
514
- @overload
515
- def __xor__(self, other: Any) -> Expr: ...
516
-
517
- def __xor__(self, other: Any) -> Selector | Expr:
518
- if is_column(other): # @2.0: remove
519
- other = by_name(other.meta.output_name())
520
- if is_selector(other):
521
- return Selector._from_pyselector(
522
- PySelector.exclusive_or(self._pyselector, other._pyselector)
523
- )
524
- else:
525
- return self.as_expr().__xor__(other)
526
-
527
- def __rxor__(self, other: Any) -> Expr:
528
- if is_column(other): # @2.0: remove
529
- other = by_name(other.meta.output_name())
530
- return self.as_expr().__rxor__(other)
531
-
532
- def exclude(
533
- self,
534
- columns: str | PolarsDataType | Collection[str] | Collection[PolarsDataType],
535
- *more_columns: str | PolarsDataType,
536
- ) -> Selector:
537
- """
538
- Exclude columns from a multi-column expression.
539
-
540
- Only works after a wildcard or regex column selection, and you cannot provide
541
- both string column names *and* dtypes (you may prefer to use selectors instead).
542
-
543
- Parameters
544
- ----------
545
- columns
546
- The name or datatype of the column(s) to exclude. Accepts regular expression
547
- input. Regular expressions should start with `^` and end with `$`.
548
- *more_columns
549
- Additional names or datatypes of columns to exclude, specified as positional
550
- arguments.
551
- """
552
- exclude_cols: builtins.list[str] = []
553
- exclude_dtypes: builtins.list[PolarsDataType] = []
554
- for item in (
555
- *(
556
- columns
557
- if isinstance(columns, Collection) and not isinstance(columns, str)
558
- else [columns]
559
- ),
560
- *more_columns,
561
- ):
562
- if isinstance(item, str):
563
- exclude_cols.append(item)
564
- elif is_polars_dtype(item):
565
- exclude_dtypes.append(item)
566
- else:
567
- msg = (
568
- "invalid input for `exclude`"
569
- f"\n\nExpected one or more `str` or `DataType`; found {item!r} instead."
570
- )
571
- raise TypeError(msg)
572
-
573
- if exclude_cols and exclude_dtypes:
574
- msg = "cannot exclude by both column name and dtype; use a selector instead"
575
- raise TypeError(msg)
576
- elif exclude_dtypes:
577
- return self - by_dtype(exclude_dtypes)
578
- else:
579
- return self - by_name(exclude_cols, require_all=False)
580
-
581
- def as_expr(self) -> Expr:
582
- """
583
- Materialize the `selector` as a normal expression.
584
-
585
- This ensures that the operators `|`, `&`, `~` and `-`
586
- are applied on the data and not on the selector sets.
587
-
588
- Examples
589
- --------
590
- >>> import polars.selectors as cs
591
- >>> df = pl.DataFrame(
592
- ... {
593
- ... "colx": ["aa", "bb", "cc"],
594
- ... "coly": [True, False, True],
595
- ... "colz": [1, 2, 3],
596
- ... }
597
- ... )
598
-
599
- Inverting the boolean selector will choose the non-boolean columns:
600
-
601
- >>> df.select(~cs.boolean())
602
- shape: (3, 2)
603
- ┌──────┬──────┐
604
- │ colx ┆ colz │
605
- │ --- ┆ --- │
606
- │ str ┆ i64 │
607
- ╞══════╪══════╡
608
- │ aa ┆ 1 │
609
- │ bb ┆ 2 │
610
- │ cc ┆ 3 │
611
- └──────┴──────┘
612
-
613
- To invert the *values* in the selected boolean columns, we need to
614
- materialize the selector as a standard expression instead:
615
-
616
- >>> df.select(~cs.boolean().as_expr())
617
- shape: (3, 1)
618
- ┌───────┐
619
- │ coly │
620
- │ --- │
621
- │ bool │
622
- ╞═══════╡
623
- │ false │
624
- │ true │
625
- │ false │
626
- └───────┘
627
- """
628
- return Expr._from_pyexpr(self._pyexpr)
629
-
630
-
631
- def _re_string(string: str | Collection[str], *, escape: bool = True) -> str:
632
- """Return escaped regex, potentially representing multiple string fragments."""
633
- if isinstance(string, str):
634
- rx = re_escape(string) if escape else string
635
- else:
636
- strings: builtins.list[str] = []
637
- for st in string:
638
- if isinstance(st, Collection) and not isinstance(st, str): # type: ignore[redundant-expr]
639
- strings.extend(st)
640
- else:
641
- strings.append(st)
642
- rx = "|".join((re_escape(x) if escape else x) for x in strings)
643
- return f"({rx})"
644
-
645
-
646
- def empty() -> Selector:
647
- """
648
- Select no columns.
649
-
650
- This is useful for composition with other selectors.
651
-
652
- See Also
653
- --------
654
- all : Select all columns in the current scope.
655
-
656
- Examples
657
- --------
658
- >>> import polars.selectors as cs
659
- >>> pl.DataFrame({"a": 1, "b": 2}).select(cs.empty())
660
- shape: (0, 0)
661
- ┌┐
662
- ╞╡
663
- └┘
664
- """
665
- return Selector._from_pyselector(PySelector.empty())
666
-
667
-
668
- def all() -> Selector:
669
- """
670
- Select all columns.
671
-
672
- See Also
673
- --------
674
- first : Select the first column in the current scope.
675
- last : Select the last column in the current scope.
676
-
677
- Examples
678
- --------
679
- >>> from datetime import date
680
- >>> import polars.selectors as cs
681
- >>> df = pl.DataFrame(
682
- ... {
683
- ... "dt": [date(1999, 12, 31), date(2024, 1, 1)],
684
- ... "value": [1_234_500, 5_000_555],
685
- ... },
686
- ... schema_overrides={"value": pl.Int32},
687
- ... )
688
-
689
- Select all columns, casting them to string:
690
-
691
- >>> df.select(cs.all().cast(pl.String))
692
- shape: (2, 2)
693
- ┌────────────┬─────────┐
694
- │ dt ┆ value │
695
- │ --- ┆ --- │
696
- │ str ┆ str │
697
- ╞════════════╪═════════╡
698
- │ 1999-12-31 ┆ 1234500 │
699
- │ 2024-01-01 ┆ 5000555 │
700
- └────────────┴─────────┘
701
-
702
- Select all columns *except* for those matching the given dtypes:
703
-
704
- >>> df.select(cs.all() - cs.numeric())
705
- shape: (2, 1)
706
- ┌────────────┐
707
- │ dt │
708
- │ --- │
709
- │ date │
710
- ╞════════════╡
711
- │ 1999-12-31 │
712
- │ 2024-01-01 │
713
- └────────────┘
714
- """
715
- return Selector._from_pyselector(PySelector.all())
716
-
717
-
718
- def alpha(ascii_only: bool = False, *, ignore_spaces: bool = False) -> Selector: # noqa: FBT001
719
- r"""
720
- Select all columns with alphabetic names (eg: only letters).
721
-
722
- Parameters
723
- ----------
724
- ascii_only
725
- Indicate whether to consider only ASCII alphabetic characters, or the full
726
- Unicode range of valid letters (accented, idiographic, etc).
727
- ignore_spaces
728
- Indicate whether to ignore the presence of spaces in column names; if so,
729
- only the other (non-space) characters are considered.
730
-
731
- Notes
732
- -----
733
- Matching column names cannot contain *any* non-alphabetic characters. Note
734
- that the definition of "alphabetic" consists of all valid Unicode alphabetic
735
- characters (`\p{Alphabetic}`) by default; this can be changed by setting
736
- `ascii_only=True`.
737
-
738
- Examples
739
- --------
740
- >>> import polars as pl
741
- >>> import polars.selectors as cs
742
- >>> df = pl.DataFrame(
743
- ... {
744
- ... "no1": [100, 200, 300],
745
- ... "café": ["espresso", "latte", "mocha"],
746
- ... "t or f": [True, False, None],
747
- ... "hmm": ["aaa", "bbb", "ccc"],
748
- ... "都市": ["東京", "大阪", "京都"],
749
- ... }
750
- ... )
751
-
752
- Select columns with alphabetic names; note that accented
753
- characters and kanji are recognised as alphabetic here:
754
-
755
- >>> df.select(cs.alpha())
756
- shape: (3, 3)
757
- ┌──────────┬─────┬──────┐
758
- │ café ┆ hmm ┆ 都市 │
759
- │ --- ┆ --- ┆ --- │
760
- │ str ┆ str ┆ str │
761
- ╞══════════╪═════╪══════╡
762
- │ espresso ┆ aaa ┆ 東京 │
763
- │ latte ┆ bbb ┆ 大阪 │
764
- │ mocha ┆ ccc ┆ 京都 │
765
- └──────────┴─────┴──────┘
766
-
767
- Constrain the definition of "alphabetic" to ASCII characters only:
768
-
769
- >>> df.select(cs.alpha(ascii_only=True))
770
- shape: (3, 1)
771
- ┌─────┐
772
- │ hmm │
773
- │ --- │
774
- │ str │
775
- ╞═════╡
776
- │ aaa │
777
- │ bbb │
778
- │ ccc │
779
- └─────┘
780
-
781
- >>> df.select(cs.alpha(ascii_only=True, ignore_spaces=True))
782
- shape: (3, 2)
783
- ┌────────┬─────┐
784
- │ t or f ┆ hmm │
785
- │ --- ┆ --- │
786
- │ bool ┆ str │
787
- ╞════════╪═════╡
788
- │ true ┆ aaa │
789
- │ false ┆ bbb │
790
- │ null ┆ ccc │
791
- └────────┴─────┘
792
-
793
- Select all columns *except* for those with alphabetic names:
794
-
795
- >>> df.select(~cs.alpha())
796
- shape: (3, 2)
797
- ┌─────┬────────┐
798
- │ no1 ┆ t or f │
799
- │ --- ┆ --- │
800
- │ i64 ┆ bool │
801
- ╞═════╪════════╡
802
- │ 100 ┆ true │
803
- │ 200 ┆ false │
804
- │ 300 ┆ null │
805
- └─────┴────────┘
806
-
807
- >>> df.select(~cs.alpha(ignore_spaces=True))
808
- shape: (3, 1)
809
- ┌─────┐
810
- │ no1 │
811
- │ --- │
812
- │ i64 │
813
- ╞═════╡
814
- │ 100 │
815
- │ 200 │
816
- │ 300 │
817
- └─────┘
818
- """
819
- # note that we need to supply a pattern compatible with the *rust* regex crate
820
- re_alpha = r"a-zA-Z" if ascii_only else r"\p{Alphabetic}"
821
- re_space = " " if ignore_spaces else ""
822
- return Selector._from_pyselector(PySelector.matches(f"^[{re_alpha}{re_space}]+$"))
823
-
824
-
825
- def alphanumeric(
826
- ascii_only: bool = False, # noqa: FBT001
827
- *,
828
- ignore_spaces: bool = False,
829
- ) -> Selector:
830
- r"""
831
- Select all columns with alphanumeric names (eg: only letters and the digits 0-9).
832
-
833
- Parameters
834
- ----------
835
- ascii_only
836
- Indicate whether to consider only ASCII alphabetic characters, or the full
837
- Unicode range of valid letters (accented, idiographic, etc).
838
- ignore_spaces
839
- Indicate whether to ignore the presence of spaces in column names; if so,
840
- only the other (non-space) characters are considered.
841
-
842
- Notes
843
- -----
844
- Matching column names cannot contain *any* non-alphabetic or integer characters.
845
- Note that the definition of "alphabetic" consists of all valid Unicode alphabetic
846
- characters (`\p{Alphabetic}`) and digit characters (`\d`) by default; this
847
- can be changed by setting `ascii_only=True`.
848
-
849
- Examples
850
- --------
851
- >>> import polars as pl
852
- >>> import polars.selectors as cs
853
- >>> df = pl.DataFrame(
854
- ... {
855
- ... "1st_col": [100, 200, 300],
856
- ... "flagged": [True, False, True],
857
- ... "00prefix": ["01:aa", "02:bb", "03:cc"],
858
- ... "last col": ["x", "y", "z"],
859
- ... }
860
- ... )
861
-
862
- Select columns with alphanumeric names:
863
-
864
- >>> df.select(cs.alphanumeric())
865
- shape: (3, 2)
866
- ┌─────────┬──────────┐
867
- │ flagged ┆ 00prefix │
868
- │ --- ┆ --- │
869
- │ bool ┆ str │
870
- ╞═════════╪══════════╡
871
- │ true ┆ 01:aa │
872
- │ false ┆ 02:bb │
873
- │ true ┆ 03:cc │
874
- └─────────┴──────────┘
875
-
876
- >>> df.select(cs.alphanumeric(ignore_spaces=True))
877
- shape: (3, 3)
878
- ┌─────────┬──────────┬──────────┐
879
- │ flagged ┆ 00prefix ┆ last col │
880
- │ --- ┆ --- ┆ --- │
881
- │ bool ┆ str ┆ str │
882
- ╞═════════╪══════════╪══════════╡
883
- │ true ┆ 01:aa ┆ x │
884
- │ false ┆ 02:bb ┆ y │
885
- │ true ┆ 03:cc ┆ z │
886
- └─────────┴──────────┴──────────┘
887
-
888
- Select all columns *except* for those with alphanumeric names:
889
-
890
- >>> df.select(~cs.alphanumeric())
891
- shape: (3, 2)
892
- ┌─────────┬──────────┐
893
- │ 1st_col ┆ last col │
894
- │ --- ┆ --- │
895
- │ i64 ┆ str │
896
- ╞═════════╪══════════╡
897
- │ 100 ┆ x │
898
- │ 200 ┆ y │
899
- │ 300 ┆ z │
900
- └─────────┴──────────┘
901
-
902
- >>> df.select(~cs.alphanumeric(ignore_spaces=True))
903
- shape: (3, 1)
904
- ┌─────────┐
905
- │ 1st_col │
906
- │ --- │
907
- │ i64 │
908
- ╞═════════╡
909
- │ 100 │
910
- │ 200 │
911
- │ 300 │
912
- └─────────┘
913
- """
914
- # note that we need to supply patterns compatible with the *rust* regex crate
915
- re_alpha = r"a-zA-Z" if ascii_only else r"\p{Alphabetic}"
916
- re_digit = "0-9" if ascii_only else r"\d"
917
- re_space = " " if ignore_spaces else ""
918
- return Selector._from_pyselector(
919
- PySelector.matches(f"^[{re_alpha}{re_digit}{re_space}]+$")
920
- )
921
-
922
-
923
- def binary() -> Selector:
924
- """
925
- Select all binary columns.
926
-
927
- See Also
928
- --------
929
- by_dtype : Select all columns matching the given dtype(s).
930
- string : Select all string columns (optionally including categoricals).
931
-
932
- Examples
933
- --------
934
- >>> import polars.selectors as cs
935
- >>> df = pl.DataFrame({"a": [b"hello"], "b": ["world"], "c": [b"!"], "d": [":)"]})
936
- >>> df
937
- shape: (1, 4)
938
- ┌──────────┬───────┬────────┬─────┐
939
- │ a ┆ b ┆ c ┆ d │
940
- │ --- ┆ --- ┆ --- ┆ --- │
941
- │ binary ┆ str ┆ binary ┆ str │
942
- ╞══════════╪═══════╪════════╪═════╡
943
- │ b"hello" ┆ world ┆ b"!" ┆ :) │
944
- └──────────┴───────┴────────┴─────┘
945
-
946
- Select binary columns and export as a dict:
947
-
948
- >>> df.select(cs.binary()).to_dict(as_series=False)
949
- {'a': [b'hello'], 'c': [b'!']}
950
-
951
- Select all columns *except* for those that are binary:
952
-
953
- >>> df.select(~cs.binary()).to_dict(as_series=False)
954
- {'b': ['world'], 'd': [':)']}
955
- """
956
- return by_dtype([Binary])
957
-
958
-
959
- def boolean() -> Selector:
960
- """
961
- Select all boolean columns.
962
-
963
- See Also
964
- --------
965
- by_dtype : Select all columns matching the given dtype(s).
966
-
967
- Examples
968
- --------
969
- >>> import polars.selectors as cs
970
- >>> df = pl.DataFrame({"n": range(1, 5)}).with_columns(n_even=pl.col("n") % 2 == 0)
971
- >>> df
972
- shape: (4, 2)
973
- ┌─────┬────────┐
974
- │ n ┆ n_even │
975
- │ --- ┆ --- │
976
- │ i64 ┆ bool │
977
- ╞═════╪════════╡
978
- │ 1 ┆ false │
979
- │ 2 ┆ true │
980
- │ 3 ┆ false │
981
- │ 4 ┆ true │
982
- └─────┴────────┘
983
-
984
- Select and invert boolean columns:
985
-
986
- >>> df.with_columns(is_odd=cs.boolean().not_())
987
- shape: (4, 3)
988
- ┌─────┬────────┬────────┐
989
- │ n ┆ n_even ┆ is_odd │
990
- │ --- ┆ --- ┆ --- │
991
- │ i64 ┆ bool ┆ bool │
992
- ╞═════╪════════╪════════╡
993
- │ 1 ┆ false ┆ true │
994
- │ 2 ┆ true ┆ false │
995
- │ 3 ┆ false ┆ true │
996
- │ 4 ┆ true ┆ false │
997
- └─────┴────────┴────────┘
998
-
999
- Select all columns *except* for those that are boolean:
1000
-
1001
- >>> df.select(~cs.boolean())
1002
- shape: (4, 1)
1003
- ┌─────┐
1004
- │ n │
1005
- │ --- │
1006
- │ i64 │
1007
- ╞═════╡
1008
- │ 1 │
1009
- │ 2 │
1010
- │ 3 │
1011
- │ 4 │
1012
- └─────┘
1013
- """
1014
- return by_dtype([Boolean])
1015
-
1016
-
1017
- def by_dtype(
1018
- *dtypes: (
1019
- PolarsDataType
1020
- | PythonDataType
1021
- | Iterable[PolarsDataType]
1022
- | Iterable[PythonDataType]
1023
- ),
1024
- ) -> Selector:
1025
- """
1026
- Select all columns matching the given dtypes.
1027
-
1028
- See Also
1029
- --------
1030
- by_name : Select all columns matching the given names.
1031
- by_index : Select all columns matching the given indices.
1032
-
1033
- Examples
1034
- --------
1035
- >>> from datetime import date
1036
- >>> import polars.selectors as cs
1037
- >>> df = pl.DataFrame(
1038
- ... {
1039
- ... "dt": [date(1999, 12, 31), date(2024, 1, 1), date(2010, 7, 5)],
1040
- ... "value": [1_234_500, 5_000_555, -4_500_000],
1041
- ... "other": ["foo", "bar", "foo"],
1042
- ... }
1043
- ... )
1044
-
1045
- Select all columns with date or string dtypes:
1046
-
1047
- >>> df.select(cs.by_dtype(pl.Date, pl.String))
1048
- shape: (3, 2)
1049
- ┌────────────┬───────┐
1050
- │ dt ┆ other │
1051
- │ --- ┆ --- │
1052
- │ date ┆ str │
1053
- ╞════════════╪═══════╡
1054
- │ 1999-12-31 ┆ foo │
1055
- │ 2024-01-01 ┆ bar │
1056
- │ 2010-07-05 ┆ foo │
1057
- └────────────┴───────┘
1058
-
1059
- Select all columns that are not of date or string dtype:
1060
-
1061
- >>> df.select(~cs.by_dtype(pl.Date, pl.String))
1062
- shape: (3, 1)
1063
- ┌──────────┐
1064
- │ value │
1065
- │ --- │
1066
- │ i64 │
1067
- ╞══════════╡
1068
- │ 1234500 │
1069
- │ 5000555 │
1070
- │ -4500000 │
1071
- └──────────┘
1072
-
1073
- Group by string columns and sum the numeric columns:
1074
-
1075
- >>> df.group_by(cs.string()).agg(cs.numeric().sum()).sort(by="other")
1076
- shape: (2, 2)
1077
- ┌───────┬──────────┐
1078
- │ other ┆ value │
1079
- │ --- ┆ --- │
1080
- │ str ┆ i64 │
1081
- ╞═══════╪══════════╡
1082
- │ bar ┆ 5000555 │
1083
- │ foo ┆ -3265500 │
1084
- └───────┴──────────┘
1085
- """
1086
- all_dtypes: builtins.list[PolarsDataType | PythonDataType] = []
1087
- for tp in dtypes:
1088
- if is_polars_dtype(tp) or isinstance(tp, type):
1089
- all_dtypes.append(tp)
1090
- elif isinstance(tp, Collection):
1091
- for t in tp:
1092
- if not (is_polars_dtype(t) or isinstance(t, type)):
1093
- msg = f"invalid dtype: {t!r}"
1094
- raise TypeError(msg)
1095
- all_dtypes.append(t)
1096
- else:
1097
- msg = f"invalid dtype: {tp!r}"
1098
- raise TypeError(msg)
1099
-
1100
- return Selector._by_dtype(all_dtypes)
1101
-
1102
-
1103
- def by_index(
1104
- *indices: int | range | Sequence[int | range], require_all: bool = True
1105
- ) -> Selector:
1106
- """
1107
- Select all columns matching the given indices (or range objects).
1108
-
1109
- Parameters
1110
- ----------
1111
- *indices
1112
- One or more column indices (or range objects).
1113
- Negative indexing is supported.
1114
-
1115
- Notes
1116
- -----
1117
- Matching columns are returned in the order in which their indexes
1118
- appear in the selector, not the underlying schema order.
1119
-
1120
- See Also
1121
- --------
1122
- by_dtype : Select all columns matching the given dtypes.
1123
- by_name : Select all columns matching the given names.
1124
-
1125
- Examples
1126
- --------
1127
- >>> import polars.selectors as cs
1128
- >>> df = pl.DataFrame(
1129
- ... {
1130
- ... "key": ["abc"],
1131
- ... **{f"c{i:02}": [0.5 * i] for i in range(100)},
1132
- ... },
1133
- ... )
1134
- >>> print(df)
1135
- shape: (1, 101)
1136
- ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
1137
- │ key ┆ c00 ┆ c01 ┆ c02 ┆ … ┆ c96 ┆ c97 ┆ c98 ┆ c99 │
1138
- │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
1139
- │ str ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
1140
- ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
1141
- │ abc ┆ 0.0 ┆ 0.5 ┆ 1.0 ┆ … ┆ 48.0 ┆ 48.5 ┆ 49.0 ┆ 49.5 │
1142
- └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘
1143
-
1144
- Select columns by index ("key" column and the two first/last columns):
1145
-
1146
- >>> df.select(cs.by_index(0, 1, 2, -2, -1))
1147
- shape: (1, 5)
1148
- ┌─────┬─────┬─────┬──────┬──────┐
1149
- │ key ┆ c00 ┆ c01 ┆ c98 ┆ c99 │
1150
- │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1151
- │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
1152
- ╞═════╪═════╪═════╪══════╪══════╡
1153
- │ abc ┆ 0.0 ┆ 0.5 ┆ 49.0 ┆ 49.5 │
1154
- └─────┴─────┴─────┴──────┴──────┘
1155
-
1156
- Select the "key" column and use a `range` object to select various columns.
1157
- Note that you can freely mix and match integer indices and `range` objects:
1158
-
1159
- >>> df.select(cs.by_index(0, range(1, 101, 20)))
1160
- shape: (1, 6)
1161
- ┌─────┬─────┬──────┬──────┬──────┬──────┐
1162
- │ key ┆ c00 ┆ c20 ┆ c40 ┆ c60 ┆ c80 │
1163
- │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1164
- │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
1165
- ╞═════╪═════╪══════╪══════╪══════╪══════╡
1166
- │ abc ┆ 0.0 ┆ 10.0 ┆ 20.0 ┆ 30.0 ┆ 40.0 │
1167
- └─────┴─────┴──────┴──────┴──────┴──────┘
1168
-
1169
- >>> df.select(cs.by_index(0, range(101, 0, -25), require_all=False))
1170
- shape: (1, 5)
1171
- ┌─────┬──────┬──────┬──────┬─────┐
1172
- │ key ┆ c75 ┆ c50 ┆ c25 ┆ c00 │
1173
- │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
1174
- │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
1175
- ╞═════╪══════╪══════╪══════╪═════╡
1176
- │ abc ┆ 37.5 ┆ 25.0 ┆ 12.5 ┆ 0.0 │
1177
- └─────┴──────┴──────┴──────┴─────┘
1178
-
1179
- Select all columns *except* for the even-indexed ones:
1180
-
1181
- >>> df.select(~cs.by_index(range(1, 100, 2)))
1182
- shape: (1, 51)
1183
- ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
1184
- │ key ┆ c01 ┆ c03 ┆ c05 ┆ … ┆ c93 ┆ c95 ┆ c97 ┆ c99 │
1185
- │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
1186
- │ str ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
1187
- ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
1188
- │ abc ┆ 0.5 ┆ 1.5 ┆ 2.5 ┆ … ┆ 46.5 ┆ 47.5 ┆ 48.5 ┆ 49.5 │
1189
- └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘
1190
- """
1191
- all_indices: builtins.list[int] = []
1192
- for idx in indices:
1193
- if isinstance(idx, (range, Sequence)):
1194
- all_indices.extend(idx) # type: ignore[arg-type]
1195
- elif isinstance(idx, int):
1196
- all_indices.append(idx)
1197
- else:
1198
- msg = f"invalid index value: {idx!r}"
1199
- raise TypeError(msg)
1200
-
1201
- return Selector._from_pyselector(PySelector.by_index(all_indices, require_all))
1202
-
1203
-
1204
- def by_name(*names: str | Collection[str], require_all: bool = True) -> Selector:
1205
- """
1206
- Select all columns matching the given names.
1207
-
1208
- .. versionadded:: 0.20.27
1209
- The `require_all` parameter was added.
1210
-
1211
- Parameters
1212
- ----------
1213
- *names
1214
- One or more names of columns to select.
1215
- require_all
1216
- Whether to match *all* names (the default) or *any* of the names.
1217
-
1218
- Notes
1219
- -----
1220
- Matching columns are returned in the order in which they are declared in
1221
- the selector, not the underlying schema order.
1222
-
1223
- See Also
1224
- --------
1225
- by_dtype : Select all columns matching the given dtypes.
1226
- by_index : Select all columns matching the given indices.
1227
-
1228
- Examples
1229
- --------
1230
- >>> import polars.selectors as cs
1231
- >>> df = pl.DataFrame(
1232
- ... {
1233
- ... "foo": ["x", "y"],
1234
- ... "bar": [123, 456],
1235
- ... "baz": [2.0, 5.5],
1236
- ... "zap": [False, True],
1237
- ... }
1238
- ... )
1239
-
1240
- Select columns by name:
1241
-
1242
- >>> df.select(cs.by_name("foo", "bar"))
1243
- shape: (2, 2)
1244
- ┌─────┬─────┐
1245
- │ foo ┆ bar │
1246
- │ --- ┆ --- │
1247
- │ str ┆ i64 │
1248
- ╞═════╪═════╡
1249
- │ x ┆ 123 │
1250
- │ y ┆ 456 │
1251
- └─────┴─────┘
1252
-
1253
- Match *any* of the given columns by name:
1254
-
1255
- >>> df.select(cs.by_name("baz", "moose", "foo", "bear", require_all=False))
1256
- shape: (2, 2)
1257
- ┌─────┬─────┐
1258
- │ baz ┆ foo │
1259
- │ --- ┆ --- │
1260
- │ f64 ┆ str │
1261
- ╞═════╪═════╡
1262
- │ 2.0 ┆ x │
1263
- │ 5.5 ┆ y │
1264
- └─────┴─────┘
1265
-
1266
- Match all columns *except* for those given:
1267
-
1268
- >>> df.select(~cs.by_name("foo", "bar"))
1269
- shape: (2, 2)
1270
- ┌─────┬───────┐
1271
- │ baz ┆ zap │
1272
- │ --- ┆ --- │
1273
- │ f64 ┆ bool │
1274
- ╞═════╪═══════╡
1275
- │ 2.0 ┆ false │
1276
- │ 5.5 ┆ true │
1277
- └─────┴───────┘
1278
- """
1279
- all_names = []
1280
- for nm in names:
1281
- if isinstance(nm, str):
1282
- all_names.append(nm)
1283
- elif isinstance(nm, Collection):
1284
- for n in nm:
1285
- if not isinstance(n, str):
1286
- msg = f"invalid name: {n!r}"
1287
- raise TypeError(msg)
1288
- all_names.append(n)
1289
- else:
1290
- msg = f"invalid name: {nm!r}"
1291
- raise TypeError(msg)
1292
-
1293
- return Selector._by_name(all_names, strict=require_all)
1294
-
1295
-
1296
- @unstable()
1297
- def enum() -> Selector:
1298
- """
1299
- Select all enum columns.
1300
-
1301
- .. warning::
1302
- This functionality is considered **unstable**. It may be changed
1303
- at any point without it being considered a breaking change.
1304
-
1305
- See Also
1306
- --------
1307
- by_dtype : Select all columns matching the given dtype(s).
1308
- categorical : Select all categorical columns.
1309
- string : Select all string columns (optionally including categoricals).
1310
-
1311
- Examples
1312
- --------
1313
- >>> import polars.selectors as cs
1314
- >>> df = pl.DataFrame(
1315
- ... {
1316
- ... "foo": ["xx", "yy"],
1317
- ... "bar": [123, 456],
1318
- ... "baz": [2.0, 5.5],
1319
- ... },
1320
- ... schema_overrides={"foo": pl.Enum(["xx", "yy"])},
1321
- ... )
1322
-
1323
- Select all enum columns:
1324
-
1325
- >>> df.select(cs.enum())
1326
- shape: (2, 1)
1327
- ┌──────┐
1328
- │ foo │
1329
- │ --- │
1330
- │ enum │
1331
- ╞══════╡
1332
- │ xx │
1333
- │ yy │
1334
- └──────┘
1335
-
1336
- Select all columns *except* for those that are enum:
1337
-
1338
- >>> df.select(~cs.enum())
1339
- shape: (2, 2)
1340
- ┌─────┬─────┐
1341
- │ bar ┆ baz │
1342
- │ --- ┆ --- │
1343
- │ i64 ┆ f64 │
1344
- ╞═════╪═════╡
1345
- │ 123 ┆ 2.0 │
1346
- │ 456 ┆ 5.5 │
1347
- └─────┴─────┘
1348
- """
1349
- return Selector._from_pyselector(PySelector.enum_())
1350
-
1351
-
1352
- @unstable()
1353
- def list(inner: None | Selector = None) -> Selector:
1354
- """
1355
- Select all list columns.
1356
-
1357
- .. warning::
1358
- This functionality is considered **unstable**. It may be changed
1359
- at any point without it being considered a breaking change.
1360
-
1361
- See Also
1362
- --------
1363
- by_dtype : Select all columns matching the given dtype(s).
1364
- array : Select all array columns.
1365
- nested : Select all nested columns.
1366
-
1367
- Examples
1368
- --------
1369
- >>> import polars.selectors as cs
1370
- >>> df = pl.DataFrame(
1371
- ... {
1372
- ... "foo": [["xx", "yy"], ["x"]],
1373
- ... "bar": [123, 456],
1374
- ... "baz": [2.0, 5.5],
1375
- ... },
1376
- ... )
1377
-
1378
- Select all list columns:
1379
-
1380
- >>> df.select(cs.list())
1381
- shape: (2, 1)
1382
- ┌──────────────┐
1383
- │ foo │
1384
- │ --- │
1385
- │ list[str] │
1386
- ╞══════════════╡
1387
- │ ["xx", "yy"] │
1388
- │ ["x"] │
1389
- └──────────────┘
1390
-
1391
- Select all columns *except* for those that are list:
1392
-
1393
- >>> df.select(~cs.list())
1394
- shape: (2, 2)
1395
- ┌─────┬─────┐
1396
- │ bar ┆ baz │
1397
- │ --- ┆ --- │
1398
- │ i64 ┆ f64 │
1399
- ╞═════╪═════╡
1400
- │ 123 ┆ 2.0 │
1401
- │ 456 ┆ 5.5 │
1402
- └─────┴─────┘
1403
-
1404
- Select all list columns with a certain matching inner type:
1405
-
1406
- >>> df.select(cs.list(cs.string()))
1407
- shape: (2, 1)
1408
- ┌──────────────┐
1409
- │ foo │
1410
- │ --- │
1411
- │ list[str] │
1412
- ╞══════════════╡
1413
- │ ["xx", "yy"] │
1414
- │ ["x"] │
1415
- └──────────────┘
1416
- >>> df.select(cs.list(cs.integer()))
1417
- shape: (0, 0)
1418
- ┌┐
1419
- ╞╡
1420
- └┘
1421
- """
1422
- inner_s = inner._pyselector if inner is not None else None
1423
- return Selector._from_pyselector(PySelector.list(inner_s))
1424
-
1425
-
1426
- @unstable()
1427
- def array(inner: Selector | None = None, *, width: int | None = None) -> Selector:
1428
- """
1429
- Select all array columns.
1430
-
1431
- .. warning::
1432
- This functionality is considered **unstable**. It may be changed
1433
- at any point without it being considered a breaking change.
1434
-
1435
- See Also
1436
- --------
1437
- by_dtype : Select all columns matching the given dtype(s).
1438
- list : Select all list columns.
1439
- nested : Select all nested columns.
1440
-
1441
- Examples
1442
- --------
1443
- >>> import polars.selectors as cs
1444
- >>> df = pl.DataFrame(
1445
- ... {
1446
- ... "foo": [["xx", "yy"], ["x", "y"]],
1447
- ... "bar": [123, 456],
1448
- ... "baz": [2.0, 5.5],
1449
- ... },
1450
- ... schema_overrides={"foo": pl.Array(pl.String, 2)},
1451
- ... )
1452
-
1453
- Select all array columns:
1454
-
1455
- >>> df.select(cs.array())
1456
- shape: (2, 1)
1457
- ┌───────────────┐
1458
- │ foo │
1459
- │ --- │
1460
- │ array[str, 2] │
1461
- ╞═══════════════╡
1462
- │ ["xx", "yy"] │
1463
- │ ["x", "y"] │
1464
- └───────────────┘
1465
-
1466
- Select all columns *except* for those that are array:
1467
-
1468
- >>> df.select(~cs.array())
1469
- shape: (2, 2)
1470
- ┌─────┬─────┐
1471
- │ bar ┆ baz │
1472
- │ --- ┆ --- │
1473
- │ i64 ┆ f64 │
1474
- ╞═════╪═════╡
1475
- │ 123 ┆ 2.0 │
1476
- │ 456 ┆ 5.5 │
1477
- └─────┴─────┘
1478
-
1479
- Select all array columns with a certain matching inner type:
1480
-
1481
- >>> df.select(cs.array(cs.string()))
1482
- shape: (2, 1)
1483
- ┌───────────────┐
1484
- │ foo │
1485
- │ --- │
1486
- │ array[str, 2] │
1487
- ╞═══════════════╡
1488
- │ ["xx", "yy"] │
1489
- │ ["x", "y"] │
1490
- └───────────────┘
1491
- >>> df.select(cs.array(cs.integer()))
1492
- shape: (0, 0)
1493
- ┌┐
1494
- ╞╡
1495
- └┘
1496
- >>> df.select(cs.array(width=2))
1497
- shape: (2, 1)
1498
- ┌───────────────┐
1499
- │ foo │
1500
- │ --- │
1501
- │ array[str, 2] │
1502
- ╞═══════════════╡
1503
- │ ["xx", "yy"] │
1504
- │ ["x", "y"] │
1505
- └───────────────┘
1506
- >>> df.select(cs.array(width=3))
1507
- shape: (0, 0)
1508
- ┌┐
1509
- ╞╡
1510
- └┘
1511
- """
1512
- inner_s = inner._pyselector if inner is not None else None
1513
- return Selector._from_pyselector(PySelector.array(inner_s, width))
1514
-
1515
-
1516
- @unstable()
1517
- def struct() -> Selector:
1518
- """
1519
- Select all struct columns.
1520
-
1521
- .. warning::
1522
- This functionality is considered **unstable**. It may be changed
1523
- at any point without it being considered a breaking change.
1524
-
1525
- See Also
1526
- --------
1527
- by_dtype : Select all columns matching the given dtype(s).
1528
- list : Select all list columns.
1529
- array : Select all array columns.
1530
- nested : Select all nested columns.
1531
-
1532
- Examples
1533
- --------
1534
- >>> import polars.selectors as cs
1535
- >>> df = pl.DataFrame(
1536
- ... {
1537
- ... "foo": [{"a": "xx", "b": "z"}, {"a": "x", "b": "y"}],
1538
- ... "bar": [123, 456],
1539
- ... "baz": [2.0, 5.5],
1540
- ... },
1541
- ... )
1542
-
1543
- Select all struct columns:
1544
-
1545
- >>> df.select(cs.struct())
1546
- shape: (2, 1)
1547
- ┌────────────┐
1548
- │ foo │
1549
- │ --- │
1550
- │ struct[2] │
1551
- ╞════════════╡
1552
- │ {"xx","z"} │
1553
- │ {"x","y"} │
1554
- └────────────┘
1555
-
1556
- Select all columns *except* for those that are struct:
1557
-
1558
- >>> df.select(~cs.struct())
1559
- shape: (2, 2)
1560
- ┌─────┬─────┐
1561
- │ bar ┆ baz │
1562
- │ --- ┆ --- │
1563
- │ i64 ┆ f64 │
1564
- ╞═════╪═════╡
1565
- │ 123 ┆ 2.0 │
1566
- │ 456 ┆ 5.5 │
1567
- └─────┴─────┘
1568
- """
1569
- return Selector._from_pyselector(PySelector.struct_())
1570
-
1571
-
1572
- @unstable()
1573
- def nested() -> Selector:
1574
- """
1575
- Select all nested columns.
1576
-
1577
- A nested column is a list, array or struct.
1578
-
1579
- .. warning::
1580
- This functionality is considered **unstable**. It may be changed
1581
- at any point without it being considered a breaking change.
1582
-
1583
- See Also
1584
- --------
1585
- by_dtype : Select all columns matching the given dtype(s).
1586
- list : Select all list columns.
1587
- array : Select all array columns.
1588
- struct : Select all struct columns.
1589
-
1590
- Examples
1591
- --------
1592
- >>> import polars.selectors as cs
1593
- >>> df = pl.DataFrame(
1594
- ... {
1595
- ... "foo": [{"a": "xx", "b": "z"}, {"a": "x", "b": "y"}],
1596
- ... "bar": [123, 456],
1597
- ... "baz": [2.0, 5.5],
1598
- ... "wow": [[1, 2], [3]],
1599
- ... },
1600
- ... )
1601
-
1602
- Select all nested columns:
1603
-
1604
- >>> df.select(cs.nested())
1605
- shape: (2, 2)
1606
- ┌────────────┬───────────┐
1607
- │ foo ┆ wow │
1608
- │ --- ┆ --- │
1609
- │ struct[2] ┆ list[i64] │
1610
- ╞════════════╪═══════════╡
1611
- │ {"xx","z"} ┆ [1, 2] │
1612
- │ {"x","y"} ┆ [3] │
1613
- └────────────┴───────────┘
1614
-
1615
- Select all columns *except* for those that are nested:
1616
-
1617
- >>> df.select(~cs.nested())
1618
- shape: (2, 2)
1619
- ┌─────┬─────┐
1620
- │ bar ┆ baz │
1621
- │ --- ┆ --- │
1622
- │ i64 ┆ f64 │
1623
- ╞═════╪═════╡
1624
- │ 123 ┆ 2.0 │
1625
- │ 456 ┆ 5.5 │
1626
- └─────┴─────┘
1627
- """
1628
- return Selector._from_pyselector(PySelector.nested())
1629
-
1630
-
1631
- def categorical() -> Selector:
1632
- """
1633
- Select all categorical columns.
1634
-
1635
- See Also
1636
- --------
1637
- by_dtype : Select all columns matching the given dtype(s).
1638
- string : Select all string columns (optionally including categoricals).
1639
-
1640
- Examples
1641
- --------
1642
- >>> import polars.selectors as cs
1643
- >>> df = pl.DataFrame(
1644
- ... {
1645
- ... "foo": ["xx", "yy"],
1646
- ... "bar": [123, 456],
1647
- ... "baz": [2.0, 5.5],
1648
- ... },
1649
- ... schema_overrides={"foo": pl.Categorical},
1650
- ... )
1651
-
1652
- Select all categorical columns:
1653
-
1654
- >>> df.select(cs.categorical())
1655
- shape: (2, 1)
1656
- ┌─────┐
1657
- │ foo │
1658
- │ --- │
1659
- │ cat │
1660
- ╞═════╡
1661
- │ xx │
1662
- │ yy │
1663
- └─────┘
1664
-
1665
- Select all columns *except* for those that are categorical:
1666
-
1667
- >>> df.select(~cs.categorical())
1668
- shape: (2, 2)
1669
- ┌─────┬─────┐
1670
- │ bar ┆ baz │
1671
- │ --- ┆ --- │
1672
- │ i64 ┆ f64 │
1673
- ╞═════╪═════╡
1674
- │ 123 ┆ 2.0 │
1675
- │ 456 ┆ 5.5 │
1676
- └─────┴─────┘
1677
- """
1678
- return Selector._from_pyselector(PySelector.categorical())
1679
-
1680
-
1681
- def contains(*substring: str) -> Selector:
1682
- """
1683
- Select columns whose names contain the given literal substring(s).
1684
-
1685
- Parameters
1686
- ----------
1687
- substring
1688
- Substring(s) that matching column names should contain.
1689
-
1690
- See Also
1691
- --------
1692
- matches : Select all columns that match the given regex pattern.
1693
- ends_with : Select columns that end with the given substring(s).
1694
- starts_with : Select columns that start with the given substring(s).
1695
-
1696
- Examples
1697
- --------
1698
- >>> import polars.selectors as cs
1699
- >>> df = pl.DataFrame(
1700
- ... {
1701
- ... "foo": ["x", "y"],
1702
- ... "bar": [123, 456],
1703
- ... "baz": [2.0, 5.5],
1704
- ... "zap": [False, True],
1705
- ... }
1706
- ... )
1707
-
1708
- Select columns that contain the substring 'ba':
1709
-
1710
- >>> df.select(cs.contains("ba"))
1711
- shape: (2, 2)
1712
- ┌─────┬─────┐
1713
- │ bar ┆ baz │
1714
- │ --- ┆ --- │
1715
- │ i64 ┆ f64 │
1716
- ╞═════╪═════╡
1717
- │ 123 ┆ 2.0 │
1718
- │ 456 ┆ 5.5 │
1719
- └─────┴─────┘
1720
-
1721
- Select columns that contain the substring 'ba' or the letter 'z':
1722
-
1723
- >>> df.select(cs.contains("ba", "z"))
1724
- shape: (2, 3)
1725
- ┌─────┬─────┬───────┐
1726
- │ bar ┆ baz ┆ zap │
1727
- │ --- ┆ --- ┆ --- │
1728
- │ i64 ┆ f64 ┆ bool │
1729
- ╞═════╪═════╪═══════╡
1730
- │ 123 ┆ 2.0 ┆ false │
1731
- │ 456 ┆ 5.5 ┆ true │
1732
- └─────┴─────┴───────┘
1733
-
1734
- Select all columns *except* for those that contain the substring 'ba':
1735
-
1736
- >>> df.select(~cs.contains("ba"))
1737
- shape: (2, 2)
1738
- ┌─────┬───────┐
1739
- │ foo ┆ zap │
1740
- │ --- ┆ --- │
1741
- │ str ┆ bool │
1742
- ╞═════╪═══════╡
1743
- │ x ┆ false │
1744
- │ y ┆ true │
1745
- └─────┴───────┘
1746
- """
1747
- escaped_substring = _re_string(substring)
1748
- raw_params = f"^.*{escaped_substring}.*$"
1749
-
1750
- return Selector._from_pyselector(PySelector.matches(raw_params))
1751
-
1752
-
1753
- def date() -> Selector:
1754
- """
1755
- Select all date columns.
1756
-
1757
- See Also
1758
- --------
1759
- datetime : Select all datetime columns, optionally filtering by time unit/zone.
1760
- duration : Select all duration columns, optionally filtering by time unit.
1761
- temporal : Select all temporal columns.
1762
- time : Select all time columns.
1763
-
1764
- Examples
1765
- --------
1766
- >>> from datetime import date, datetime, time
1767
- >>> import polars.selectors as cs
1768
- >>> df = pl.DataFrame(
1769
- ... {
1770
- ... "dtm": [datetime(2001, 5, 7, 10, 25), datetime(2031, 12, 31, 0, 30)],
1771
- ... "dt": [date(1999, 12, 31), date(2024, 8, 9)],
1772
- ... "tm": [time(0, 0, 0), time(23, 59, 59)],
1773
- ... },
1774
- ... )
1775
-
1776
- Select all date columns:
1777
-
1778
- >>> df.select(cs.date())
1779
- shape: (2, 1)
1780
- ┌────────────┐
1781
- │ dt │
1782
- │ --- │
1783
- │ date │
1784
- ╞════════════╡
1785
- │ 1999-12-31 │
1786
- │ 2024-08-09 │
1787
- └────────────┘
1788
-
1789
- Select all columns *except* for those that are dates:
1790
-
1791
- >>> df.select(~cs.date())
1792
- shape: (2, 2)
1793
- ┌─────────────────────┬──────────┐
1794
- │ dtm ┆ tm │
1795
- │ --- ┆ --- │
1796
- │ datetime[μs] ┆ time │
1797
- ╞═════════════════════╪══════════╡
1798
- │ 2001-05-07 10:25:00 ┆ 00:00:00 │
1799
- │ 2031-12-31 00:30:00 ┆ 23:59:59 │
1800
- └─────────────────────┴──────────┘
1801
- """
1802
- return by_dtype([Date])
1803
-
1804
-
1805
- def datetime(
1806
- time_unit: TimeUnit | Collection[TimeUnit] | None = None,
1807
- time_zone: (
1808
- str | pydatetime.timezone | Collection[str | pydatetime.timezone | None] | None
1809
- ) = (
1810
- "*",
1811
- None,
1812
- ),
1813
- ) -> Selector:
1814
- """
1815
- Select all datetime columns, optionally filtering by time unit/zone.
1816
-
1817
- Parameters
1818
- ----------
1819
- time_unit
1820
- One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns".
1821
- Omit to select columns with any valid timeunit.
1822
- time_zone
1823
- * One or more timezone strings, as defined in zoneinfo (to see valid options
1824
- run `import zoneinfo; zoneinfo.available_timezones()` for a full list).
1825
- * Set `None` to select Datetime columns that do not have a timezone.
1826
- * Set "*" to select Datetime columns that have *any* timezone.
1827
-
1828
- See Also
1829
- --------
1830
- date : Select all date columns.
1831
- duration : Select all duration columns, optionally filtering by time unit.
1832
- temporal : Select all temporal columns.
1833
- time : Select all time columns.
1834
-
1835
- Examples
1836
- --------
1837
- >>> from datetime import datetime, date, timezone
1838
- >>> import polars.selectors as cs
1839
- >>> from zoneinfo import ZoneInfo
1840
- >>> tokyo_tz = ZoneInfo("Asia/Tokyo")
1841
- >>> utc_tz = timezone.utc
1842
- >>> df = pl.DataFrame(
1843
- ... {
1844
- ... "tstamp_tokyo": [
1845
- ... datetime(1999, 7, 21, 5, 20, 16, 987654, tzinfo=tokyo_tz),
1846
- ... datetime(2000, 5, 16, 6, 21, 21, 123465, tzinfo=tokyo_tz),
1847
- ... ],
1848
- ... "tstamp_utc": [
1849
- ... datetime(2023, 4, 10, 12, 14, 16, 999000, tzinfo=utc_tz),
1850
- ... datetime(2025, 8, 25, 14, 18, 22, 666000, tzinfo=utc_tz),
1851
- ... ],
1852
- ... "tstamp": [
1853
- ... datetime(2000, 11, 20, 18, 12, 16, 600000),
1854
- ... datetime(2020, 10, 30, 10, 20, 25, 123000),
1855
- ... ],
1856
- ... "dt": [date(1999, 12, 31), date(2010, 7, 5)],
1857
- ... },
1858
- ... schema_overrides={
1859
- ... "tstamp_tokyo": pl.Datetime("ns", "Asia/Tokyo"),
1860
- ... "tstamp_utc": pl.Datetime("us", "UTC"),
1861
- ... },
1862
- ... )
1863
-
1864
- Select all datetime columns:
1865
-
1866
- >>> df.select(cs.datetime())
1867
- shape: (2, 3)
1868
- ┌────────────────────────────────┬─────────────────────────────┬─────────────────────────┐
1869
- │ tstamp_tokyo ┆ tstamp_utc ┆ tstamp │
1870
- │ --- ┆ --- ┆ --- │
1871
- │ datetime[ns, Asia/Tokyo] ┆ datetime[μs, UTC] ┆ datetime[μs] │
1872
- ╞════════════════════════════════╪═════════════════════════════╪═════════════════════════╡
1873
- │ 1999-07-21 05:20:16.987654 JST ┆ 2023-04-10 12:14:16.999 UTC ┆ 2000-11-20 18:12:16.600 │
1874
- │ 2000-05-16 06:21:21.123465 JST ┆ 2025-08-25 14:18:22.666 UTC ┆ 2020-10-30 10:20:25.123 │
1875
- └────────────────────────────────┴─────────────────────────────┴─────────────────────────┘
1876
-
1877
- Select all datetime columns that have 'us' precision:
1878
-
1879
- >>> df.select(cs.datetime("us"))
1880
- shape: (2, 2)
1881
- ┌─────────────────────────────┬─────────────────────────┐
1882
- │ tstamp_utc ┆ tstamp │
1883
- │ --- ┆ --- │
1884
- │ datetime[μs, UTC] ┆ datetime[μs] │
1885
- ╞═════════════════════════════╪═════════════════════════╡
1886
- │ 2023-04-10 12:14:16.999 UTC ┆ 2000-11-20 18:12:16.600 │
1887
- │ 2025-08-25 14:18:22.666 UTC ┆ 2020-10-30 10:20:25.123 │
1888
- └─────────────────────────────┴─────────────────────────┘
1889
-
1890
- Select all datetime columns that have *any* timezone:
1891
-
1892
- >>> df.select(cs.datetime(time_zone="*"))
1893
- shape: (2, 2)
1894
- ┌────────────────────────────────┬─────────────────────────────┐
1895
- │ tstamp_tokyo ┆ tstamp_utc │
1896
- │ --- ┆ --- │
1897
- │ datetime[ns, Asia/Tokyo] ┆ datetime[μs, UTC] │
1898
- ╞════════════════════════════════╪═════════════════════════════╡
1899
- │ 1999-07-21 05:20:16.987654 JST ┆ 2023-04-10 12:14:16.999 UTC │
1900
- │ 2000-05-16 06:21:21.123465 JST ┆ 2025-08-25 14:18:22.666 UTC │
1901
- └────────────────────────────────┴─────────────────────────────┘
1902
-
1903
- Select all datetime columns that have a *specific* timezone:
1904
-
1905
- >>> df.select(cs.datetime(time_zone="UTC"))
1906
- shape: (2, 1)
1907
- ┌─────────────────────────────┐
1908
- │ tstamp_utc │
1909
- │ --- │
1910
- │ datetime[μs, UTC] │
1911
- ╞═════════════════════════════╡
1912
- │ 2023-04-10 12:14:16.999 UTC │
1913
- │ 2025-08-25 14:18:22.666 UTC │
1914
- └─────────────────────────────┘
1915
-
1916
- Select all datetime columns that have NO timezone:
1917
-
1918
- >>> df.select(cs.datetime(time_zone=None))
1919
- shape: (2, 1)
1920
- ┌─────────────────────────┐
1921
- │ tstamp │
1922
- │ --- │
1923
- │ datetime[μs] │
1924
- ╞═════════════════════════╡
1925
- │ 2000-11-20 18:12:16.600 │
1926
- │ 2020-10-30 10:20:25.123 │
1927
- └─────────────────────────┘
1928
-
1929
- Select all columns *except* for datetime columns:
1930
-
1931
- >>> df.select(~cs.datetime())
1932
- shape: (2, 1)
1933
- ┌────────────┐
1934
- │ dt │
1935
- │ --- │
1936
- │ date │
1937
- ╞════════════╡
1938
- │ 1999-12-31 │
1939
- │ 2010-07-05 │
1940
- └────────────┘
1941
- """ # noqa: W505
1942
- if time_unit is None:
1943
- time_unit_lst = ["ms", "us", "ns"]
1944
- else:
1945
- time_unit_lst = (
1946
- [time_unit] if isinstance(time_unit, str) else builtins.list(time_unit)
1947
- )
1948
-
1949
- time_zone_lst: builtins.list[str | pydatetime.timezone | None]
1950
- if time_zone is None:
1951
- time_zone_lst = [None]
1952
- elif time_zone:
1953
- time_zone_lst = (
1954
- [time_zone]
1955
- if isinstance(time_zone, (str, pydatetime.timezone))
1956
- else builtins.list(time_zone)
1957
- )
1958
-
1959
- return Selector._from_pyselector(PySelector.datetime(time_unit_lst, time_zone_lst))
1960
-
1961
-
1962
- def decimal() -> Selector:
1963
- """
1964
- Select all decimal columns.
1965
-
1966
- See Also
1967
- --------
1968
- float : Select all float columns.
1969
- integer : Select all integer columns.
1970
- numeric : Select all numeric columns.
1971
-
1972
- Examples
1973
- --------
1974
- >>> from decimal import Decimal as D
1975
- >>> import polars.selectors as cs
1976
- >>> df = pl.DataFrame(
1977
- ... {
1978
- ... "foo": ["x", "y"],
1979
- ... "bar": [D(123), D(456)],
1980
- ... "baz": [D("2.0005"), D("-50.5555")],
1981
- ... },
1982
- ... schema_overrides={"baz": pl.Decimal(scale=5, precision=10)},
1983
- ... )
1984
-
1985
- Select all decimal columns:
1986
-
1987
- >>> df.select(cs.decimal())
1988
- shape: (2, 2)
1989
- ┌───────────────┬───────────────┐
1990
- │ bar ┆ baz │
1991
- │ --- ┆ --- │
1992
- │ decimal[38,0] ┆ decimal[10,5] │
1993
- ╞═══════════════╪═══════════════╡
1994
- │ 123 ┆ 2.00050 │
1995
- │ 456 ┆ -50.55550 │
1996
- └───────────────┴───────────────┘
1997
-
1998
- Select all columns *except* the decimal ones:
1999
-
2000
- >>> df.select(~cs.decimal())
2001
- shape: (2, 1)
2002
- ┌─────┐
2003
- │ foo │
2004
- │ --- │
2005
- │ str │
2006
- ╞═════╡
2007
- │ x │
2008
- │ y │
2009
- └─────┘
2010
- """
2011
- # TODO: allow explicit selection by scale/precision?
2012
- return Selector._from_pyselector(PySelector.decimal())
2013
-
2014
-
2015
- def digit(ascii_only: bool = False) -> Selector: # noqa: FBT001
2016
- r"""
2017
- Select all columns having names consisting only of digits.
2018
-
2019
- Notes
2020
- -----
2021
- Matching column names cannot contain *any* non-digit characters. Note that the
2022
- definition of "digit" consists of all valid Unicode digit characters (`\d`)
2023
- by default; this can be changed by setting `ascii_only=True`.
2024
-
2025
- Examples
2026
- --------
2027
- >>> import polars as pl
2028
- >>> import polars.selectors as cs
2029
- >>> df = pl.DataFrame(
2030
- ... {
2031
- ... "key": ["aaa", "bbb", "aaa", "bbb", "bbb"],
2032
- ... "year": [2001, 2001, 2025, 2025, 2001],
2033
- ... "value": [-25, 100, 75, -15, -5],
2034
- ... }
2035
- ... ).pivot(
2036
- ... values="value",
2037
- ... index="key",
2038
- ... on="year",
2039
- ... aggregate_function="sum",
2040
- ... )
2041
- >>> print(df)
2042
- shape: (2, 3)
2043
- ┌─────┬──────┬──────┐
2044
- │ key ┆ 2001 ┆ 2025 │
2045
- │ --- ┆ --- ┆ --- │
2046
- │ str ┆ i64 ┆ i64 │
2047
- ╞═════╪══════╪══════╡
2048
- │ aaa ┆ -25 ┆ 75 │
2049
- │ bbb ┆ 95 ┆ -15 │
2050
- └─────┴──────┴──────┘
2051
-
2052
- Select columns with digit names:
2053
-
2054
- >>> df.select(cs.digit())
2055
- shape: (2, 2)
2056
- ┌──────┬──────┐
2057
- │ 2001 ┆ 2025 │
2058
- │ --- ┆ --- │
2059
- │ i64 ┆ i64 │
2060
- ╞══════╪══════╡
2061
- │ -25 ┆ 75 │
2062
- │ 95 ┆ -15 │
2063
- └──────┴──────┘
2064
-
2065
- Select all columns *except* for those with digit names:
2066
-
2067
- >>> df.select(~cs.digit())
2068
- shape: (2, 1)
2069
- ┌─────┐
2070
- │ key │
2071
- │ --- │
2072
- │ str │
2073
- ╞═════╡
2074
- │ aaa │
2075
- │ bbb │
2076
- └─────┘
2077
-
2078
- Demonstrate use of `ascii_only` flag (by default all valid unicode digits
2079
- are considered, but this can be constrained to ascii 0-9):
2080
-
2081
- >>> df = pl.DataFrame({"१९९९": [1999], "२०७७": [2077], "3000": [3000]})
2082
- >>> df.select(cs.digit())
2083
- shape: (1, 3)
2084
- ┌──────┬──────┬──────┐
2085
- │ १९९९ ┆ २०७७ ┆ 3000 │
2086
- │ --- ┆ --- ┆ --- │
2087
- │ i64 ┆ i64 ┆ i64 │
2088
- ╞══════╪══════╪══════╡
2089
- │ 1999 ┆ 2077 ┆ 3000 │
2090
- └──────┴──────┴──────┘
2091
-
2092
- >>> df.select(cs.digit(ascii_only=True))
2093
- shape: (1, 1)
2094
- ┌──────┐
2095
- │ 3000 │
2096
- │ --- │
2097
- │ i64 │
2098
- ╞══════╡
2099
- │ 3000 │
2100
- └──────┘
2101
- """
2102
- re_digit = r"[0-9]" if ascii_only else r"\d"
2103
- return Selector._from_pyselector(PySelector.matches(rf"^{re_digit}+$"))
2104
-
2105
-
2106
- def duration(
2107
- time_unit: TimeUnit | Collection[TimeUnit] | None = None,
2108
- ) -> Selector:
2109
- """
2110
- Select all duration columns, optionally filtering by time unit.
2111
-
2112
- Parameters
2113
- ----------
2114
- time_unit
2115
- One (or more) of the allowed timeunit precision strings, "ms", "us", and "ns".
2116
- Omit to select columns with any valid timeunit.
2117
-
2118
- See Also
2119
- --------
2120
- date : Select all date columns.
2121
- datetime : Select all datetime columns, optionally filtering by time unit/zone.
2122
- temporal : Select all temporal columns.
2123
- time : Select all time columns.
2124
-
2125
- Examples
2126
- --------
2127
- >>> from datetime import date, timedelta
2128
- >>> import polars.selectors as cs
2129
- >>> df = pl.DataFrame(
2130
- ... {
2131
- ... "dt": [date(2022, 1, 31), date(2025, 7, 5)],
2132
- ... "td1": [
2133
- ... timedelta(days=1, milliseconds=123456),
2134
- ... timedelta(days=1, hours=23, microseconds=987000),
2135
- ... ],
2136
- ... "td2": [
2137
- ... timedelta(days=7, microseconds=456789),
2138
- ... timedelta(days=14, minutes=999, seconds=59),
2139
- ... ],
2140
- ... "td3": [
2141
- ... timedelta(weeks=4, days=-10, microseconds=999999),
2142
- ... timedelta(weeks=3, milliseconds=123456, microseconds=1),
2143
- ... ],
2144
- ... },
2145
- ... schema_overrides={
2146
- ... "td1": pl.Duration("ms"),
2147
- ... "td2": pl.Duration("us"),
2148
- ... "td3": pl.Duration("ns"),
2149
- ... },
2150
- ... )
2151
-
2152
- Select all duration columns:
2153
-
2154
- >>> df.select(cs.duration())
2155
- shape: (2, 3)
2156
- ┌────────────────┬─────────────────┬────────────────────┐
2157
- │ td1 ┆ td2 ┆ td3 │
2158
- │ --- ┆ --- ┆ --- │
2159
- │ duration[ms] ┆ duration[μs] ┆ duration[ns] │
2160
- ╞════════════════╪═════════════════╪════════════════════╡
2161
- │ 1d 2m 3s 456ms ┆ 7d 456789µs ┆ 18d 999999µs │
2162
- │ 1d 23h 987ms ┆ 14d 16h 39m 59s ┆ 21d 2m 3s 456001µs │
2163
- └────────────────┴─────────────────┴────────────────────┘
2164
-
2165
- Select all duration columns that have 'ms' precision:
2166
-
2167
- >>> df.select(cs.duration("ms"))
2168
- shape: (2, 1)
2169
- ┌────────────────┐
2170
- │ td1 │
2171
- │ --- │
2172
- │ duration[ms] │
2173
- ╞════════════════╡
2174
- │ 1d 2m 3s 456ms │
2175
- │ 1d 23h 987ms │
2176
- └────────────────┘
2177
-
2178
- Select all duration columns that have 'ms' OR 'ns' precision:
2179
-
2180
- >>> df.select(cs.duration(["ms", "ns"]))
2181
- shape: (2, 2)
2182
- ┌────────────────┬────────────────────┐
2183
- │ td1 ┆ td3 │
2184
- │ --- ┆ --- │
2185
- │ duration[ms] ┆ duration[ns] │
2186
- ╞════════════════╪════════════════════╡
2187
- │ 1d 2m 3s 456ms ┆ 18d 999999µs │
2188
- │ 1d 23h 987ms ┆ 21d 2m 3s 456001µs │
2189
- └────────────────┴────────────────────┘
2190
-
2191
- Select all columns *except* for duration columns:
2192
-
2193
- >>> df.select(~cs.duration())
2194
- shape: (2, 1)
2195
- ┌────────────┐
2196
- │ dt │
2197
- │ --- │
2198
- │ date │
2199
- ╞════════════╡
2200
- │ 2022-01-31 │
2201
- │ 2025-07-05 │
2202
- └────────────┘
2203
- """
2204
- if time_unit is None:
2205
- time_unit = ["ms", "us", "ns"]
2206
- else:
2207
- time_unit = (
2208
- [time_unit] if isinstance(time_unit, str) else builtins.list(time_unit)
2209
- )
2210
-
2211
- return Selector._from_pyselector(PySelector.duration(time_unit))
2212
-
2213
-
2214
- def ends_with(*suffix: str) -> Selector:
2215
- """
2216
- Select columns that end with the given substring(s).
2217
-
2218
- See Also
2219
- --------
2220
- contains : Select columns that contain the given literal substring(s).
2221
- matches : Select all columns that match the given regex pattern.
2222
- starts_with : Select columns that start with the given substring(s).
2223
-
2224
- Parameters
2225
- ----------
2226
- suffix
2227
- Substring(s) that matching column names should end with.
2228
-
2229
- Examples
2230
- --------
2231
- >>> import polars.selectors as cs
2232
- >>> df = pl.DataFrame(
2233
- ... {
2234
- ... "foo": ["x", "y"],
2235
- ... "bar": [123, 456],
2236
- ... "baz": [2.0, 5.5],
2237
- ... "zap": [False, True],
2238
- ... }
2239
- ... )
2240
-
2241
- Select columns that end with the substring 'z':
2242
-
2243
- >>> df.select(cs.ends_with("z"))
2244
- shape: (2, 1)
2245
- ┌─────┐
2246
- │ baz │
2247
- │ --- │
2248
- │ f64 │
2249
- ╞═════╡
2250
- │ 2.0 │
2251
- │ 5.5 │
2252
- └─────┘
2253
-
2254
- Select columns that end with *either* the letter 'z' or 'r':
2255
-
2256
- >>> df.select(cs.ends_with("z", "r"))
2257
- shape: (2, 2)
2258
- ┌─────┬─────┐
2259
- │ bar ┆ baz │
2260
- │ --- ┆ --- │
2261
- │ i64 ┆ f64 │
2262
- ╞═════╪═════╡
2263
- │ 123 ┆ 2.0 │
2264
- │ 456 ┆ 5.5 │
2265
- └─────┴─────┘
2266
-
2267
- Select all columns *except* for those that end with the substring 'z':
2268
-
2269
- >>> df.select(~cs.ends_with("z"))
2270
- shape: (2, 3)
2271
- ┌─────┬─────┬───────┐
2272
- │ foo ┆ bar ┆ zap │
2273
- │ --- ┆ --- ┆ --- │
2274
- │ str ┆ i64 ┆ bool │
2275
- ╞═════╪═════╪═══════╡
2276
- │ x ┆ 123 ┆ false │
2277
- │ y ┆ 456 ┆ true │
2278
- └─────┴─────┴───────┘
2279
- """
2280
- escaped_suffix = _re_string(suffix)
2281
- raw_params = f"^.*{escaped_suffix}$"
2282
-
2283
- return Selector._from_pyselector(PySelector.matches(raw_params))
2284
-
2285
-
2286
- def exclude(
2287
- columns: (
2288
- str
2289
- | PolarsDataType
2290
- | Selector
2291
- | Expr
2292
- | Collection[str | PolarsDataType | Selector | Expr]
2293
- ),
2294
- *more_columns: str | PolarsDataType | Selector | Expr,
2295
- ) -> Selector:
2296
- """
2297
- Select all columns except those matching the given columns, datatypes, or selectors.
2298
-
2299
- Parameters
2300
- ----------
2301
- columns
2302
- One or more columns (col or name), datatypes, columns, or selectors representing
2303
- the columns to exclude.
2304
- *more_columns
2305
- Additional columns, datatypes, or selectors to exclude, specified as positional
2306
- arguments.
2307
-
2308
- Notes
2309
- -----
2310
- If excluding a single selector it is simpler to write as `~selector` instead.
2311
-
2312
- Examples
2313
- --------
2314
- Exclude by column name(s):
2315
-
2316
- >>> import polars.selectors as cs
2317
- >>> df = pl.DataFrame(
2318
- ... {
2319
- ... "aa": [1, 2, 3],
2320
- ... "ba": ["a", "b", None],
2321
- ... "cc": [None, 2.5, 1.5],
2322
- ... }
2323
- ... )
2324
- >>> df.select(cs.exclude("ba", "xx"))
2325
- shape: (3, 2)
2326
- ┌─────┬──────┐
2327
- │ aa ┆ cc │
2328
- │ --- ┆ --- │
2329
- │ i64 ┆ f64 │
2330
- ╞═════╪══════╡
2331
- │ 1 ┆ null │
2332
- │ 2 ┆ 2.5 │
2333
- │ 3 ┆ 1.5 │
2334
- └─────┴──────┘
2335
-
2336
- Exclude using a column name, a selector, and a dtype:
2337
-
2338
- >>> df.select(cs.exclude("aa", cs.string(), pl.UInt32))
2339
- shape: (3, 1)
2340
- ┌──────┐
2341
- │ cc │
2342
- │ --- │
2343
- │ f64 │
2344
- ╞══════╡
2345
- │ null │
2346
- │ 2.5 │
2347
- │ 1.5 │
2348
- └──────┘
2349
- """
2350
- return ~_combine_as_selector(columns, *more_columns)
2351
-
2352
-
2353
- def first(*, strict: bool = True) -> Selector:
2354
- """
2355
- Select the first column in the current scope.
2356
-
2357
- See Also
2358
- --------
2359
- all : Select all columns.
2360
- last : Select the last column in the current scope.
2361
-
2362
- Examples
2363
- --------
2364
- >>> import polars.selectors as cs
2365
- >>> df = pl.DataFrame(
2366
- ... {
2367
- ... "foo": ["x", "y"],
2368
- ... "bar": [123, 456],
2369
- ... "baz": [2.0, 5.5],
2370
- ... "zap": [0, 1],
2371
- ... }
2372
- ... )
2373
-
2374
- Select the first column:
2375
-
2376
- >>> df.select(cs.first())
2377
- shape: (2, 1)
2378
- ┌─────┐
2379
- │ foo │
2380
- │ --- │
2381
- │ str │
2382
- ╞═════╡
2383
- │ x │
2384
- │ y │
2385
- └─────┘
2386
-
2387
- Select everything *except* for the first column:
2388
-
2389
- >>> df.select(~cs.first())
2390
- shape: (2, 3)
2391
- ┌─────┬─────┬─────┐
2392
- │ bar ┆ baz ┆ zap │
2393
- │ --- ┆ --- ┆ --- │
2394
- │ i64 ┆ f64 ┆ i64 │
2395
- ╞═════╪═════╪═════╡
2396
- │ 123 ┆ 2.0 ┆ 0 │
2397
- │ 456 ┆ 5.5 ┆ 1 │
2398
- └─────┴─────┴─────┘
2399
- """
2400
- return Selector._from_pyselector(PySelector.first(strict))
2401
-
2402
-
2403
- def float() -> Selector:
2404
- """
2405
- Select all float columns.
2406
-
2407
- See Also
2408
- --------
2409
- integer : Select all integer columns.
2410
- numeric : Select all numeric columns.
2411
- signed_integer : Select all signed integer columns.
2412
- unsigned_integer : Select all unsigned integer columns.
2413
-
2414
- Examples
2415
- --------
2416
- >>> import polars.selectors as cs
2417
- >>> df = pl.DataFrame(
2418
- ... {
2419
- ... "foo": ["x", "y"],
2420
- ... "bar": [123, 456],
2421
- ... "baz": [2.0, 5.5],
2422
- ... "zap": [0.0, 1.0],
2423
- ... },
2424
- ... schema_overrides={"baz": pl.Float32, "zap": pl.Float64},
2425
- ... )
2426
-
2427
- Select all float columns:
2428
-
2429
- >>> df.select(cs.float())
2430
- shape: (2, 2)
2431
- ┌─────┬─────┐
2432
- │ baz ┆ zap │
2433
- │ --- ┆ --- │
2434
- │ f32 ┆ f64 │
2435
- ╞═════╪═════╡
2436
- │ 2.0 ┆ 0.0 │
2437
- │ 5.5 ┆ 1.0 │
2438
- └─────┴─────┘
2439
-
2440
- Select all columns *except* for those that are float:
2441
-
2442
- >>> df.select(~cs.float())
2443
- shape: (2, 2)
2444
- ┌─────┬─────┐
2445
- │ foo ┆ bar │
2446
- │ --- ┆ --- │
2447
- │ str ┆ i64 │
2448
- ╞═════╪═════╡
2449
- │ x ┆ 123 │
2450
- │ y ┆ 456 │
2451
- └─────┴─────┘
2452
- """
2453
- return Selector._from_pyselector(PySelector.float())
2454
-
2455
-
2456
- def integer() -> Selector:
2457
- """
2458
- Select all integer columns.
2459
-
2460
- See Also
2461
- --------
2462
- by_dtype : Select columns by dtype.
2463
- float : Select all float columns.
2464
- numeric : Select all numeric columns.
2465
- signed_integer : Select all signed integer columns.
2466
- unsigned_integer : Select all unsigned integer columns.
2467
-
2468
- Examples
2469
- --------
2470
- >>> import polars.selectors as cs
2471
- >>> df = pl.DataFrame(
2472
- ... {
2473
- ... "foo": ["x", "y"],
2474
- ... "bar": [123, 456],
2475
- ... "baz": [2.0, 5.5],
2476
- ... "zap": [0, 1],
2477
- ... }
2478
- ... )
2479
-
2480
- Select all integer columns:
2481
-
2482
- >>> df.select(cs.integer())
2483
- shape: (2, 2)
2484
- ┌─────┬─────┐
2485
- │ bar ┆ zap │
2486
- │ --- ┆ --- │
2487
- │ i64 ┆ i64 │
2488
- ╞═════╪═════╡
2489
- │ 123 ┆ 0 │
2490
- │ 456 ┆ 1 │
2491
- └─────┴─────┘
2492
-
2493
- Select all columns *except* for those that are integer :
2494
-
2495
- >>> df.select(~cs.integer())
2496
- shape: (2, 2)
2497
- ┌─────┬─────┐
2498
- │ foo ┆ baz │
2499
- │ --- ┆ --- │
2500
- │ str ┆ f64 │
2501
- ╞═════╪═════╡
2502
- │ x ┆ 2.0 │
2503
- │ y ┆ 5.5 │
2504
- └─────┴─────┘
2505
- """
2506
- return Selector._from_pyselector(PySelector.integer())
2507
-
2508
-
2509
- def signed_integer() -> Selector:
2510
- """
2511
- Select all signed integer columns.
2512
-
2513
- See Also
2514
- --------
2515
- by_dtype : Select columns by dtype.
2516
- float : Select all float columns.
2517
- integer : Select all integer columns.
2518
- numeric : Select all numeric columns.
2519
- unsigned_integer : Select all unsigned integer columns.
2520
-
2521
- Examples
2522
- --------
2523
- >>> import polars.selectors as cs
2524
- >>> df = pl.DataFrame(
2525
- ... {
2526
- ... "foo": [-123, -456],
2527
- ... "bar": [3456, 6789],
2528
- ... "baz": [7654, 4321],
2529
- ... "zap": ["ab", "cd"],
2530
- ... },
2531
- ... schema_overrides={"bar": pl.UInt32, "baz": pl.UInt64},
2532
- ... )
2533
-
2534
- Select all signed integer columns:
2535
-
2536
- >>> df.select(cs.signed_integer())
2537
- shape: (2, 1)
2538
- ┌──────┐
2539
- │ foo │
2540
- │ --- │
2541
- │ i64 │
2542
- ╞══════╡
2543
- │ -123 │
2544
- │ -456 │
2545
- └──────┘
2546
-
2547
- >>> df.select(~cs.signed_integer())
2548
- shape: (2, 3)
2549
- ┌──────┬──────┬─────┐
2550
- │ bar ┆ baz ┆ zap │
2551
- │ --- ┆ --- ┆ --- │
2552
- │ u32 ┆ u64 ┆ str │
2553
- ╞══════╪══════╪═════╡
2554
- │ 3456 ┆ 7654 ┆ ab │
2555
- │ 6789 ┆ 4321 ┆ cd │
2556
- └──────┴──────┴─────┘
2557
-
2558
- Select all integer columns (both signed and unsigned):
2559
-
2560
- >>> df.select(cs.integer())
2561
- shape: (2, 3)
2562
- ┌──────┬──────┬──────┐
2563
- │ foo ┆ bar ┆ baz │
2564
- │ --- ┆ --- ┆ --- │
2565
- │ i64 ┆ u32 ┆ u64 │
2566
- ╞══════╪══════╪══════╡
2567
- │ -123 ┆ 3456 ┆ 7654 │
2568
- │ -456 ┆ 6789 ┆ 4321 │
2569
- └──────┴──────┴──────┘
2570
- """
2571
- return Selector._from_pyselector(PySelector.signed_integer())
2572
-
2573
-
2574
- def unsigned_integer() -> Selector:
2575
- """
2576
- Select all unsigned integer columns.
2577
-
2578
- See Also
2579
- --------
2580
- by_dtype : Select columns by dtype.
2581
- float : Select all float columns.
2582
- integer : Select all integer columns.
2583
- numeric : Select all numeric columns.
2584
- signed_integer : Select all signed integer columns.
2585
-
2586
- Examples
2587
- --------
2588
- >>> import polars.selectors as cs
2589
- >>> df = pl.DataFrame(
2590
- ... {
2591
- ... "foo": [-123, -456],
2592
- ... "bar": [3456, 6789],
2593
- ... "baz": [7654, 4321],
2594
- ... "zap": ["ab", "cd"],
2595
- ... },
2596
- ... schema_overrides={"bar": pl.UInt32, "baz": pl.UInt64},
2597
- ... )
2598
-
2599
- Select all unsigned integer columns:
2600
-
2601
- >>> df.select(cs.unsigned_integer())
2602
- shape: (2, 2)
2603
- ┌──────┬──────┐
2604
- │ bar ┆ baz │
2605
- │ --- ┆ --- │
2606
- │ u32 ┆ u64 │
2607
- ╞══════╪══════╡
2608
- │ 3456 ┆ 7654 │
2609
- │ 6789 ┆ 4321 │
2610
- └──────┴──────┘
2611
-
2612
- Select all columns *except* for those that are unsigned integers:
2613
-
2614
- >>> df.select(~cs.unsigned_integer())
2615
- shape: (2, 2)
2616
- ┌──────┬─────┐
2617
- │ foo ┆ zap │
2618
- │ --- ┆ --- │
2619
- │ i64 ┆ str │
2620
- ╞══════╪═════╡
2621
- │ -123 ┆ ab │
2622
- │ -456 ┆ cd │
2623
- └──────┴─────┘
2624
-
2625
- Select all integer columns (both signed and unsigned):
2626
-
2627
- >>> df.select(cs.integer())
2628
- shape: (2, 3)
2629
- ┌──────┬──────┬──────┐
2630
- │ foo ┆ bar ┆ baz │
2631
- │ --- ┆ --- ┆ --- │
2632
- │ i64 ┆ u32 ┆ u64 │
2633
- ╞══════╪══════╪══════╡
2634
- │ -123 ┆ 3456 ┆ 7654 │
2635
- │ -456 ┆ 6789 ┆ 4321 │
2636
- └──────┴──────┴──────┘
2637
- """
2638
- return Selector._from_pyselector(PySelector.unsigned_integer())
2639
-
2640
-
2641
- def last(*, strict: bool = True) -> Selector:
2642
- """
2643
- Select the last column in the current scope.
2644
-
2645
- See Also
2646
- --------
2647
- all : Select all columns.
2648
- first : Select the first column in the current scope.
2649
-
2650
- Examples
2651
- --------
2652
- >>> import polars.selectors as cs
2653
- >>> df = pl.DataFrame(
2654
- ... {
2655
- ... "foo": ["x", "y"],
2656
- ... "bar": [123, 456],
2657
- ... "baz": [2.0, 5.5],
2658
- ... "zap": [0, 1],
2659
- ... }
2660
- ... )
2661
-
2662
- Select the last column:
2663
-
2664
- >>> df.select(cs.last())
2665
- shape: (2, 1)
2666
- ┌─────┐
2667
- │ zap │
2668
- │ --- │
2669
- │ i64 │
2670
- ╞═════╡
2671
- │ 0 │
2672
- │ 1 │
2673
- └─────┘
2674
-
2675
- Select everything *except* for the last column:
2676
-
2677
- >>> df.select(~cs.last())
2678
- shape: (2, 3)
2679
- ┌─────┬─────┬─────┐
2680
- │ foo ┆ bar ┆ baz │
2681
- │ --- ┆ --- ┆ --- │
2682
- │ str ┆ i64 ┆ f64 │
2683
- ╞═════╪═════╪═════╡
2684
- │ x ┆ 123 ┆ 2.0 │
2685
- │ y ┆ 456 ┆ 5.5 │
2686
- └─────┴─────┴─────┘
2687
- """
2688
- return Selector._from_pyselector(PySelector.last(strict))
2689
-
2690
-
2691
- def matches(pattern: str) -> Selector:
2692
- """
2693
- Select all columns that match the given regex pattern.
2694
-
2695
- See Also
2696
- --------
2697
- contains : Select all columns that contain the given substring.
2698
- ends_with : Select all columns that end with the given substring(s).
2699
- starts_with : Select all columns that start with the given substring(s).
2700
-
2701
- Parameters
2702
- ----------
2703
- pattern
2704
- A valid regular expression pattern, compatible with the `regex crate
2705
- <https://docs.rs/regex/latest/regex/>`_.
2706
-
2707
- Examples
2708
- --------
2709
- >>> import polars.selectors as cs
2710
- >>> df = pl.DataFrame(
2711
- ... {
2712
- ... "foo": ["x", "y"],
2713
- ... "bar": [123, 456],
2714
- ... "baz": [2.0, 5.5],
2715
- ... "zap": [0, 1],
2716
- ... }
2717
- ... )
2718
-
2719
- Match column names containing an 'a', preceded by a character that is not 'z':
2720
-
2721
- >>> df.select(cs.matches("[^z]a"))
2722
- shape: (2, 2)
2723
- ┌─────┬─────┐
2724
- │ bar ┆ baz │
2725
- │ --- ┆ --- │
2726
- │ i64 ┆ f64 │
2727
- ╞═════╪═════╡
2728
- │ 123 ┆ 2.0 │
2729
- │ 456 ┆ 5.5 │
2730
- └─────┴─────┘
2731
-
2732
- Do not match column names ending in 'R' or 'z' (case-insensitively):
2733
-
2734
- >>> df.select(~cs.matches(r"(?i)R|z$"))
2735
- shape: (2, 2)
2736
- ┌─────┬─────┐
2737
- │ foo ┆ zap │
2738
- │ --- ┆ --- │
2739
- │ str ┆ i64 │
2740
- ╞═════╪═════╡
2741
- │ x ┆ 0 │
2742
- │ y ┆ 1 │
2743
- └─────┴─────┘
2744
- """
2745
- if pattern == ".*":
2746
- return all()
2747
- else:
2748
- if pattern.startswith(".*"):
2749
- pattern = pattern[2:]
2750
- elif pattern.endswith(".*"):
2751
- pattern = pattern[:-2]
2752
-
2753
- pfx = "^.*" if not pattern.startswith("^") else ""
2754
- sfx = ".*$" if not pattern.endswith("$") else ""
2755
- raw_params = f"{pfx}{pattern}{sfx}"
2756
-
2757
- return Selector._from_pyselector(PySelector.matches(raw_params))
2758
-
2759
-
2760
- def numeric() -> Selector:
2761
- """
2762
- Select all numeric columns.
2763
-
2764
- See Also
2765
- --------
2766
- by_dtype : Select columns by dtype.
2767
- float : Select all float columns.
2768
- integer : Select all integer columns.
2769
- signed_integer : Select all signed integer columns.
2770
- unsigned_integer : Select all unsigned integer columns.
2771
-
2772
- Examples
2773
- --------
2774
- >>> import polars.selectors as cs
2775
- >>> df = pl.DataFrame(
2776
- ... {
2777
- ... "foo": ["x", "y"],
2778
- ... "bar": [123, 456],
2779
- ... "baz": [2.0, 5.5],
2780
- ... "zap": [0, 0],
2781
- ... },
2782
- ... schema_overrides={"bar": pl.Int16, "baz": pl.Float32, "zap": pl.UInt8},
2783
- ... )
2784
-
2785
- Match all numeric columns:
2786
-
2787
- >>> df.select(cs.numeric())
2788
- shape: (2, 3)
2789
- ┌─────┬─────┬─────┐
2790
- │ bar ┆ baz ┆ zap │
2791
- │ --- ┆ --- ┆ --- │
2792
- │ i16 ┆ f32 ┆ u8 │
2793
- ╞═════╪═════╪═════╡
2794
- │ 123 ┆ 2.0 ┆ 0 │
2795
- │ 456 ┆ 5.5 ┆ 0 │
2796
- └─────┴─────┴─────┘
2797
-
2798
- Match all columns *except* for those that are numeric:
2799
-
2800
- >>> df.select(~cs.numeric())
2801
- shape: (2, 1)
2802
- ┌─────┐
2803
- │ foo │
2804
- │ --- │
2805
- │ str │
2806
- ╞═════╡
2807
- │ x │
2808
- │ y │
2809
- └─────┘
2810
- """
2811
- return Selector._from_pyselector(PySelector.numeric())
2812
-
2813
-
2814
- def object() -> Selector:
2815
- """
2816
- Select all object columns.
2817
-
2818
- See Also
2819
- --------
2820
- by_dtype : Select columns by dtype.
2821
-
2822
- Examples
2823
- --------
2824
- >>> import polars.selectors as cs
2825
- >>> from uuid import uuid4
2826
- >>> with pl.Config(fmt_str_lengths=36):
2827
- ... df = pl.DataFrame(
2828
- ... {
2829
- ... "idx": [0, 1],
2830
- ... "uuid_obj": [uuid4(), uuid4()],
2831
- ... "uuid_str": [str(uuid4()), str(uuid4())],
2832
- ... },
2833
- ... schema_overrides={"idx": pl.Int32},
2834
- ... )
2835
- ... print(df) # doctest: +IGNORE_RESULT
2836
- shape: (2, 3)
2837
- ┌─────┬──────────────────────────────────────┬──────────────────────────────────────┐
2838
- │ idx ┆ uuid_obj ┆ uuid_str │
2839
- │ --- ┆ --- ┆ --- │
2840
- │ i32 ┆ object ┆ str │
2841
- ╞═════╪══════════════════════════════════════╪══════════════════════════════════════╡
2842
- │ 0 ┆ 6be063cf-c9c6-43be-878e-e446cfd42981 ┆ acab9fea-c05d-4b91-b639-418004a63f33 │
2843
- │ 1 ┆ 7849d8f9-2cac-48e7-96d3-63cf81c14869 ┆ 28c65415-8b7d-4857-a4ce-300dca14b12b │
2844
- └─────┴──────────────────────────────────────┴──────────────────────────────────────┘
2845
-
2846
- Select object columns and export as a dict:
2847
-
2848
- >>> df.select(cs.object()).to_dict(as_series=False) # doctest: +IGNORE_RESULT
2849
- {
2850
- "uuid_obj": [
2851
- UUID("6be063cf-c9c6-43be-878e-e446cfd42981"),
2852
- UUID("7849d8f9-2cac-48e7-96d3-63cf81c14869"),
2853
- ]
2854
- }
2855
-
2856
- Select all columns *except* for those that are object and export as dict:
2857
-
2858
- >>> df.select(~cs.object()) # doctest: +IGNORE_RESULT
2859
- {
2860
- "idx": [0, 1],
2861
- "uuid_str": [
2862
- "acab9fea-c05d-4b91-b639-418004a63f33",
2863
- "28c65415-8b7d-4857-a4ce-300dca14b12b",
2864
- ],
2865
- }
2866
- """ # noqa: W505
2867
- return Selector._from_pyselector(PySelector.object())
2868
-
2869
-
2870
- def starts_with(*prefix: str) -> Selector:
2871
- """
2872
- Select columns that start with the given substring(s).
2873
-
2874
- Parameters
2875
- ----------
2876
- prefix
2877
- Substring(s) that matching column names should start with.
2878
-
2879
- See Also
2880
- --------
2881
- contains : Select all columns that contain the given substring.
2882
- ends_with : Select all columns that end with the given substring(s).
2883
- matches : Select all columns that match the given regex pattern.
2884
-
2885
- Examples
2886
- --------
2887
- >>> import polars.selectors as cs
2888
- >>> df = pl.DataFrame(
2889
- ... {
2890
- ... "foo": [1.0, 2.0],
2891
- ... "bar": [3.0, 4.0],
2892
- ... "baz": [5, 6],
2893
- ... "zap": [7, 8],
2894
- ... }
2895
- ... )
2896
-
2897
- Match columns starting with a 'b':
2898
-
2899
- >>> df.select(cs.starts_with("b"))
2900
- shape: (2, 2)
2901
- ┌─────┬─────┐
2902
- │ bar ┆ baz │
2903
- │ --- ┆ --- │
2904
- │ f64 ┆ i64 │
2905
- ╞═════╪═════╡
2906
- │ 3.0 ┆ 5 │
2907
- │ 4.0 ┆ 6 │
2908
- └─────┴─────┘
2909
-
2910
- Match columns starting with *either* the letter 'b' or 'z':
2911
-
2912
- >>> df.select(cs.starts_with("b", "z"))
2913
- shape: (2, 3)
2914
- ┌─────┬─────┬─────┐
2915
- │ bar ┆ baz ┆ zap │
2916
- │ --- ┆ --- ┆ --- │
2917
- │ f64 ┆ i64 ┆ i64 │
2918
- ╞═════╪═════╪═════╡
2919
- │ 3.0 ┆ 5 ┆ 7 │
2920
- │ 4.0 ┆ 6 ┆ 8 │
2921
- └─────┴─────┴─────┘
2922
-
2923
- Match all columns *except* for those starting with 'b':
2924
-
2925
- >>> df.select(~cs.starts_with("b"))
2926
- shape: (2, 2)
2927
- ┌─────┬─────┐
2928
- │ foo ┆ zap │
2929
- │ --- ┆ --- │
2930
- │ f64 ┆ i64 │
2931
- ╞═════╪═════╡
2932
- │ 1.0 ┆ 7 │
2933
- │ 2.0 ┆ 8 │
2934
- └─────┴─────┘
2935
- """
2936
- escaped_prefix = _re_string(prefix)
2937
- raw_params = f"^{escaped_prefix}.*$"
2938
-
2939
- return Selector._from_pyselector(PySelector.matches(raw_params))
2940
-
2941
-
2942
- def string(*, include_categorical: bool = False) -> Selector:
2943
- """
2944
- Select all String (and, optionally, Categorical) string columns.
2945
-
2946
- See Also
2947
- --------
2948
- binary : Select all binary columns.
2949
- by_dtype : Select all columns matching the given dtype(s).
2950
- categorical: Select all categorical columns.
2951
-
2952
- Examples
2953
- --------
2954
- >>> import polars.selectors as cs
2955
- >>> df = pl.DataFrame(
2956
- ... {
2957
- ... "w": ["xx", "yy", "xx", "yy", "xx"],
2958
- ... "x": [1, 2, 1, 4, -2],
2959
- ... "y": [3.0, 4.5, 1.0, 2.5, -2.0],
2960
- ... "z": ["a", "b", "a", "b", "b"],
2961
- ... },
2962
- ... ).with_columns(
2963
- ... z=pl.col("z").cast(pl.Categorical("lexical")),
2964
- ... )
2965
-
2966
- Group by all string columns, sum the numeric columns, then sort by the string cols:
2967
-
2968
- >>> df.group_by(cs.string()).agg(cs.numeric().sum()).sort(by=cs.string())
2969
- shape: (2, 3)
2970
- ┌─────┬─────┬─────┐
2971
- │ w ┆ x ┆ y │
2972
- │ --- ┆ --- ┆ --- │
2973
- │ str ┆ i64 ┆ f64 │
2974
- ╞═════╪═════╪═════╡
2975
- │ xx ┆ 0 ┆ 2.0 │
2976
- │ yy ┆ 6 ┆ 7.0 │
2977
- └─────┴─────┴─────┘
2978
-
2979
- Group by all string *and* categorical columns:
2980
-
2981
- >>> df.group_by(cs.string(include_categorical=True)).agg(cs.numeric().sum()).sort(
2982
- ... by=cs.string(include_categorical=True)
2983
- ... )
2984
- shape: (3, 4)
2985
- ┌─────┬─────┬─────┬──────┐
2986
- │ w ┆ z ┆ x ┆ y │
2987
- │ --- ┆ --- ┆ --- ┆ --- │
2988
- │ str ┆ cat ┆ i64 ┆ f64 │
2989
- ╞═════╪═════╪═════╪══════╡
2990
- │ xx ┆ a ┆ 2 ┆ 4.0 │
2991
- │ xx ┆ b ┆ -2 ┆ -2.0 │
2992
- │ yy ┆ b ┆ 6 ┆ 7.0 │
2993
- └─────┴─────┴─────┴──────┘
2994
- """
2995
- string_dtypes: builtins.list[PolarsDataType] = [String]
2996
- if include_categorical:
2997
- string_dtypes.append(Categorical)
2998
-
2999
- return by_dtype(string_dtypes)
3000
-
3001
-
3002
- def temporal() -> Selector:
3003
- """
3004
- Select all temporal columns.
3005
-
3006
- See Also
3007
- --------
3008
- by_dtype : Select all columns matching the given dtype(s).
3009
- date : Select all date columns.
3010
- datetime : Select all datetime columns, optionally filtering by time unit/zone.
3011
- duration : Select all duration columns, optionally filtering by time unit.
3012
- time : Select all time columns.
3013
-
3014
- Examples
3015
- --------
3016
- >>> from datetime import date, time
3017
- >>> import polars.selectors as cs
3018
- >>> df = pl.DataFrame(
3019
- ... {
3020
- ... "dt": [date(2021, 1, 1), date(2021, 1, 2)],
3021
- ... "tm": [time(12, 0, 0), time(20, 30, 45)],
3022
- ... "value": [1.2345, 2.3456],
3023
- ... }
3024
- ... )
3025
-
3026
- Match all temporal columns:
3027
-
3028
- >>> df.select(cs.temporal())
3029
- shape: (2, 2)
3030
- ┌────────────┬──────────┐
3031
- │ dt ┆ tm │
3032
- │ --- ┆ --- │
3033
- │ date ┆ time │
3034
- ╞════════════╪══════════╡
3035
- │ 2021-01-01 ┆ 12:00:00 │
3036
- │ 2021-01-02 ┆ 20:30:45 │
3037
- └────────────┴──────────┘
3038
-
3039
- Match all temporal columns *except* for time columns:
3040
-
3041
- >>> df.select(cs.temporal() - cs.time())
3042
- shape: (2, 1)
3043
- ┌────────────┐
3044
- │ dt │
3045
- │ --- │
3046
- │ date │
3047
- ╞════════════╡
3048
- │ 2021-01-01 │
3049
- │ 2021-01-02 │
3050
- └────────────┘
3051
-
3052
- Match all columns *except* for temporal columns:
3053
-
3054
- >>> df.select(~cs.temporal())
3055
- shape: (2, 1)
3056
- ┌────────┐
3057
- │ value │
3058
- │ --- │
3059
- │ f64 │
3060
- ╞════════╡
3061
- │ 1.2345 │
3062
- │ 2.3456 │
3063
- └────────┘
3064
- """
3065
- return Selector._from_pyselector(PySelector.temporal())
3066
-
3067
-
3068
- def time() -> Selector:
3069
- """
3070
- Select all time columns.
3071
-
3072
- See Also
3073
- --------
3074
- date : Select all date columns.
3075
- datetime : Select all datetime columns, optionally filtering by time unit/zone.
3076
- duration : Select all duration columns, optionally filtering by time unit.
3077
- temporal : Select all temporal columns.
3078
-
3079
- Examples
3080
- --------
3081
- >>> from datetime import date, datetime, time
3082
- >>> import polars.selectors as cs
3083
- >>> df = pl.DataFrame(
3084
- ... {
3085
- ... "dtm": [datetime(2001, 5, 7, 10, 25), datetime(2031, 12, 31, 0, 30)],
3086
- ... "dt": [date(1999, 12, 31), date(2024, 8, 9)],
3087
- ... "tm": [time(0, 0, 0), time(23, 59, 59)],
3088
- ... },
3089
- ... )
3090
-
3091
- Select all time columns:
3092
-
3093
- >>> df.select(cs.time())
3094
- shape: (2, 1)
3095
- ┌──────────┐
3096
- │ tm │
3097
- │ --- │
3098
- │ time │
3099
- ╞══════════╡
3100
- │ 00:00:00 │
3101
- │ 23:59:59 │
3102
- └──────────┘
3103
-
3104
- Select all columns *except* for those that are times:
3105
-
3106
- >>> df.select(~cs.time())
3107
- shape: (2, 2)
3108
- ┌─────────────────────┬────────────┐
3109
- │ dtm ┆ dt │
3110
- │ --- ┆ --- │
3111
- │ datetime[μs] ┆ date │
3112
- ╞═════════════════════╪════════════╡
3113
- │ 2001-05-07 10:25:00 ┆ 1999-12-31 │
3114
- │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
3115
- └─────────────────────┴────────────┘
3116
- """
3117
- return by_dtype([Time])