polars-runtime-compat 1.34.0b3__cp39-abi3-win_amd64.whl → 1.34.0b5__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (204) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  2. polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
  3. polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
  202. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
  204. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/functions/lazy.py DELETED
@@ -1,2751 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- from typing import TYPE_CHECKING, Any, Callable, overload
5
-
6
- import polars._reexport as pl
7
- import polars.functions as F
8
- import polars.selectors as cs
9
- from polars._dependencies import _check_for_numpy
10
- from polars._dependencies import numpy as np
11
- from polars._utils.async_ import _AioDataFrameResult, _GeventDataFrameResult
12
- from polars._utils.deprecation import (
13
- deprecate_renamed_parameter,
14
- deprecate_streaming_parameter,
15
- deprecated,
16
- issue_deprecation_warning,
17
- )
18
- from polars._utils.parse import (
19
- parse_into_expression,
20
- parse_into_list_of_expressions,
21
- )
22
- from polars._utils.unstable import issue_unstable_warning, unstable
23
- from polars._utils.various import extend_bool, qualified_type_name
24
- from polars._utils.wrap import wrap_df, wrap_expr, wrap_s
25
- from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64
26
- from polars.datatypes._parse import parse_into_datatype_expr
27
- from polars.lazyframe.opt_flags import (
28
- DEFAULT_QUERY_OPT_FLAGS,
29
- forward_old_opt_flags,
30
- )
31
- from polars.meta.index_type import get_index_type
32
-
33
- with contextlib.suppress(ImportError): # Module not available when building docs
34
- import polars._plr as plr
35
-
36
- if TYPE_CHECKING:
37
- import sys
38
- from collections.abc import Awaitable, Collection, Iterable, Sequence
39
- from typing import Literal
40
-
41
- from polars import DataFrame, Expr, LazyFrame, Series
42
- from polars._typing import (
43
- CorrelationMethod,
44
- EngineType,
45
- EpochTimeUnit,
46
- IntoExpr,
47
- PolarsDataType,
48
- QuantileMethod,
49
- )
50
- from polars.lazyframe.opt_flags import (
51
- QueryOptFlags,
52
- )
53
-
54
- if sys.version_info >= (3, 13):
55
- from warnings import deprecated
56
- else:
57
- from typing_extensions import deprecated # noqa: TC004
58
-
59
-
60
- def field(name: str | list[str]) -> Expr:
61
- """
62
- Select a field in the current `struct.with_fields` scope.
63
-
64
- name
65
- Name of the field(s) to select.
66
- """
67
- if isinstance(name, str):
68
- name = [name]
69
- return wrap_expr(plr.field(name))
70
-
71
-
72
- def element() -> Expr:
73
- """
74
- Alias for an element being evaluated in an `eval` or `filter` expression.
75
-
76
- Examples
77
- --------
78
- A horizontal rank computation by taking the elements of a list
79
-
80
- >>> df = pl.DataFrame(
81
- ... {
82
- ... "a": [1, 8, 3],
83
- ... "b": [4, 5, 2],
84
- ... }
85
- ... )
86
- >>> df.with_columns(
87
- ... pl.concat_list(["a", "b"]).list.eval(pl.element().rank()).alias("rank")
88
- ... )
89
- shape: (3, 3)
90
- ┌─────┬─────┬────────────┐
91
- │ a ┆ b ┆ rank │
92
- │ --- ┆ --- ┆ --- │
93
- │ i64 ┆ i64 ┆ list[f64] │
94
- ╞═════╪═════╪════════════╡
95
- │ 1 ┆ 4 ┆ [1.0, 2.0] │
96
- │ 8 ┆ 5 ┆ [2.0, 1.0] │
97
- │ 3 ┆ 2 ┆ [2.0, 1.0] │
98
- └─────┴─────┴────────────┘
99
-
100
- A mathematical operation on array elements
101
-
102
- >>> df = pl.DataFrame(
103
- ... {
104
- ... "a": [1, 8, 3],
105
- ... "b": [4, 5, 2],
106
- ... }
107
- ... )
108
- >>> df.with_columns(
109
- ... pl.concat_list(["a", "b"]).list.eval(pl.element() * 2).alias("a_b_doubled")
110
- ... )
111
- shape: (3, 3)
112
- ┌─────┬─────┬─────────────┐
113
- │ a ┆ b ┆ a_b_doubled │
114
- │ --- ┆ --- ┆ --- │
115
- │ i64 ┆ i64 ┆ list[i64] │
116
- ╞═════╪═════╪═════════════╡
117
- │ 1 ┆ 4 ┆ [2, 8] │
118
- │ 8 ┆ 5 ┆ [16, 10] │
119
- │ 3 ┆ 2 ┆ [6, 4] │
120
- └─────┴─────┴─────────────┘
121
-
122
- A filter operation on list elements
123
-
124
- >>> import polars as pl
125
- >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
126
- >>> df.with_columns(
127
- ... evens=pl.concat_list("a", "b").list.filter(pl.element() % 2 == 0)
128
- ... )
129
- shape: (3, 3)
130
- ┌─────┬─────┬───────────┐
131
- │ a ┆ b ┆ evens │
132
- │ --- ┆ --- ┆ --- │
133
- │ i64 ┆ i64 ┆ list[i64] │
134
- ╞═════╪═════╪═══════════╡
135
- │ 1 ┆ 4 ┆ [4] │
136
- │ 8 ┆ 5 ┆ [8] │
137
- │ 3 ┆ 2 ┆ [2] │
138
- └─────┴─────┴───────────┘
139
- """
140
- return F.col("")
141
-
142
-
143
- def count(*columns: str) -> Expr:
144
- """
145
- Return the number of non-null values in the column.
146
-
147
- This function is syntactic sugar for `col(columns).count()`.
148
-
149
- Calling this function without any arguments returns the number of rows in the
150
- context. **This way of using the function is deprecated.** Please use :func:`len`
151
- instead.
152
-
153
- Parameters
154
- ----------
155
- *columns
156
- One or more column names.
157
-
158
- Returns
159
- -------
160
- Expr
161
- Expression of data type :class:`UInt32`.
162
-
163
- See Also
164
- --------
165
- Expr.count
166
-
167
- Examples
168
- --------
169
- >>> df = pl.DataFrame(
170
- ... {
171
- ... "a": [1, 2, None],
172
- ... "b": [3, None, None],
173
- ... "c": ["foo", "bar", "foo"],
174
- ... }
175
- ... )
176
- >>> df.select(pl.count("a"))
177
- shape: (1, 1)
178
- ┌─────┐
179
- │ a │
180
- │ --- │
181
- │ u32 │
182
- ╞═════╡
183
- │ 2 │
184
- └─────┘
185
-
186
- Return the number of non-null values in multiple columns.
187
-
188
- >>> df.select(pl.count("b", "c"))
189
- shape: (1, 2)
190
- ┌─────┬─────┐
191
- │ b ┆ c │
192
- │ --- ┆ --- │
193
- │ u32 ┆ u32 │
194
- ╞═════╪═════╡
195
- │ 1 ┆ 3 │
196
- └─────┴─────┘
197
-
198
- Return the number of rows in a context. **This way of using the function is
199
- deprecated.** Please use :func:`len` instead.
200
-
201
- >>> df.select(pl.count()) # doctest: +SKIP
202
- shape: (1, 1)
203
- ┌───────┐
204
- │ count │
205
- │ --- │
206
- │ u32 │
207
- ╞═══════╡
208
- │ 3 │
209
- └───────┘
210
- """
211
- if not columns:
212
- issue_deprecation_warning(
213
- "`pl.count()` is deprecated. Please use `pl.len()` instead.",
214
- version="0.20.5",
215
- )
216
- return F.len().alias("count")
217
- return F.col(*columns).count()
218
-
219
-
220
- def cum_count(*columns: str, reverse: bool = False) -> Expr:
221
- """
222
- Return the cumulative count of the non-null values in the column.
223
-
224
- This function is syntactic sugar for `col(columns).cum_count()`.
225
-
226
- Parameters
227
- ----------
228
- *columns
229
- Name(s) of the columns to use.
230
- reverse
231
- Reverse the operation.
232
-
233
- Examples
234
- --------
235
- >>> df = pl.DataFrame({"a": [1, 2, None], "b": [3, None, None]})
236
- >>> df.with_columns(
237
- ... ca=pl.cum_count("a"),
238
- ... cb=pl.cum_count("b"),
239
- ... )
240
- shape: (3, 4)
241
- ┌──────┬──────┬─────┬─────┐
242
- │ a ┆ b ┆ ca ┆ cb │
243
- │ --- ┆ --- ┆ --- ┆ --- │
244
- │ i64 ┆ i64 ┆ u32 ┆ u32 │
245
- ╞══════╪══════╪═════╪═════╡
246
- │ 1 ┆ 3 ┆ 1 ┆ 1 │
247
- │ 2 ┆ null ┆ 2 ┆ 1 │
248
- │ null ┆ null ┆ 2 ┆ 1 │
249
- └──────┴──────┴─────┴─────┘
250
- """
251
- return F.col(*columns).cum_count(reverse=reverse)
252
-
253
-
254
- def implode(*columns: str) -> Expr:
255
- """
256
- Aggregate all column values into a list.
257
-
258
- This function is syntactic sugar for `pl.col(name).implode()`.
259
-
260
- Parameters
261
- ----------
262
- *columns
263
- One or more column names.
264
-
265
- Examples
266
- --------
267
- >>> df = pl.DataFrame(
268
- ... {
269
- ... "a": [1, 2, 3],
270
- ... "b": [9, 8, 7],
271
- ... "c": ["foo", "bar", "foo"],
272
- ... }
273
- ... )
274
- >>> df.select(pl.implode("a"))
275
- shape: (1, 1)
276
- ┌───────────┐
277
- │ a │
278
- │ --- │
279
- │ list[i64] │
280
- ╞═══════════╡
281
- │ [1, 2, 3] │
282
- └───────────┘
283
- >>> df.select(pl.implode("b", "c"))
284
- shape: (1, 2)
285
- ┌───────────┬───────────────────────┐
286
- │ b ┆ c │
287
- │ --- ┆ --- │
288
- │ list[i64] ┆ list[str] │
289
- ╞═══════════╪═══════════════════════╡
290
- │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
291
- └───────────┴───────────────────────┘
292
-
293
- """
294
- return F.col(*columns).implode()
295
-
296
-
297
- def std(column: str, ddof: int = 1) -> Expr:
298
- """
299
- Get the standard deviation.
300
-
301
- This function is syntactic sugar for `pl.col(column).std(ddof)`.
302
-
303
- Parameters
304
- ----------
305
- column
306
- Column name.
307
- ddof
308
- “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
309
- where N represents the number of elements.
310
- By default ddof is 1.
311
-
312
- Examples
313
- --------
314
- >>> df = pl.DataFrame(
315
- ... {
316
- ... "a": [1, 8, 3],
317
- ... "b": [4, 5, 2],
318
- ... "c": ["foo", "bar", "foo"],
319
- ... }
320
- ... )
321
- >>> df.select(pl.std("a"))
322
- shape: (1, 1)
323
- ┌──────────┐
324
- │ a │
325
- │ --- │
326
- │ f64 │
327
- ╞══════════╡
328
- │ 3.605551 │
329
- └──────────┘
330
- >>> df["a"].std()
331
- 3.605551275463989
332
- """
333
- return F.col(column).std(ddof)
334
-
335
-
336
- def var(column: str, ddof: int = 1) -> Expr:
337
- """
338
- Get the variance.
339
-
340
- This function is syntactic sugar for `pl.col(column).var(ddof)`.
341
-
342
- Parameters
343
- ----------
344
- column
345
- Column name.
346
- ddof
347
- “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
348
- where N represents the number of elements.
349
- By default ddof is 1.
350
-
351
- Examples
352
- --------
353
- >>> df = pl.DataFrame(
354
- ... {
355
- ... "a": [1, 8, 3],
356
- ... "b": [4, 5, 2],
357
- ... "c": ["foo", "bar", "foo"],
358
- ... },
359
- ... )
360
- >>> df.select(pl.var("a"))
361
- shape: (1, 1)
362
- ┌──────┐
363
- │ a │
364
- │ --- │
365
- │ f64 │
366
- ╞══════╡
367
- │ 13.0 │
368
- └──────┘
369
- >>> df["a"].var()
370
- 13.0
371
- """
372
- return F.col(column).var(ddof)
373
-
374
-
375
- def mean(*columns: str) -> Expr:
376
- """
377
- Get the mean value.
378
-
379
- This function is syntactic sugar for `pl.col(columns).mean()`.
380
-
381
- Parameters
382
- ----------
383
- *columns
384
- One or more column names.
385
-
386
- See Also
387
- --------
388
- mean_horizontal
389
-
390
- Examples
391
- --------
392
- >>> df = pl.DataFrame(
393
- ... {
394
- ... "a": [1, 8, 3],
395
- ... "b": [4, 5, 2],
396
- ... "c": ["foo", "bar", "foo"],
397
- ... }
398
- ... )
399
- >>> df.select(pl.mean("a"))
400
- shape: (1, 1)
401
- ┌─────┐
402
- │ a │
403
- │ --- │
404
- │ f64 │
405
- ╞═════╡
406
- │ 4.0 │
407
- └─────┘
408
- >>> df.select(pl.mean("a", "b"))
409
- shape: (1, 2)
410
- ┌─────┬──────────┐
411
- │ a ┆ b │
412
- │ --- ┆ --- │
413
- │ f64 ┆ f64 │
414
- ╞═════╪══════════╡
415
- │ 4.0 ┆ 3.666667 │
416
- └─────┴──────────┘
417
-
418
- """
419
- return F.col(*columns).mean()
420
-
421
-
422
- def median(*columns: str) -> Expr:
423
- """
424
- Get the median value.
425
-
426
- This function is syntactic sugar for `pl.col(columns).median()`.
427
-
428
- Parameters
429
- ----------
430
- columns
431
- One or more column names.
432
-
433
- Examples
434
- --------
435
- >>> df = pl.DataFrame(
436
- ... {
437
- ... "a": [1, 8, 3],
438
- ... "b": [4, 5, 2],
439
- ... "c": ["foo", "bar", "foo"],
440
- ... }
441
- ... )
442
- >>> df.select(pl.median("a"))
443
- shape: (1, 1)
444
- ┌─────┐
445
- │ a │
446
- │ --- │
447
- │ f64 │
448
- ╞═════╡
449
- │ 3.0 │
450
- └─────┘
451
- >>> df.select(pl.median("a", "b"))
452
- shape: (1, 2)
453
- ┌─────┬─────┐
454
- │ a ┆ b │
455
- │ --- ┆ --- │
456
- │ f64 ┆ f64 │
457
- ╞═════╪═════╡
458
- │ 3.0 ┆ 4.0 │
459
- └─────┴─────┘
460
-
461
- """
462
- return F.col(*columns).median()
463
-
464
-
465
- def n_unique(*columns: str) -> Expr:
466
- """
467
- Count unique values.
468
-
469
- This function is syntactic sugar for `pl.col(columns).n_unique()`.
470
-
471
- Parameters
472
- ----------
473
- columns
474
- One or more column names.
475
-
476
- Examples
477
- --------
478
- >>> df = pl.DataFrame(
479
- ... {
480
- ... "a": [1, 8, 1],
481
- ... "b": [4, 5, 2],
482
- ... "c": ["foo", "bar", "foo"],
483
- ... }
484
- ... )
485
- >>> df.select(pl.n_unique("a"))
486
- shape: (1, 1)
487
- ┌─────┐
488
- │ a │
489
- │ --- │
490
- │ u32 │
491
- ╞═════╡
492
- │ 2 │
493
- └─────┘
494
- >>> df.select(pl.n_unique("b", "c"))
495
- shape: (1, 2)
496
- ┌─────┬─────┐
497
- │ b ┆ c │
498
- │ --- ┆ --- │
499
- │ u32 ┆ u32 │
500
- ╞═════╪═════╡
501
- │ 3 ┆ 2 │
502
- └─────┴─────┘
503
-
504
- """
505
- return F.col(*columns).n_unique()
506
-
507
-
508
- def approx_n_unique(*columns: str) -> Expr:
509
- """
510
- Approximate count of unique values.
511
-
512
- This function is syntactic sugar for `pl.col(columns).approx_n_unique()`, and
513
- uses the HyperLogLog++ algorithm for cardinality estimation.
514
-
515
- Parameters
516
- ----------
517
- columns
518
- One or more column names.
519
-
520
- Examples
521
- --------
522
- >>> df = pl.DataFrame(
523
- ... {
524
- ... "a": [1, 8, 1],
525
- ... "b": [4, 5, 2],
526
- ... "c": ["foo", "bar", "foo"],
527
- ... }
528
- ... )
529
- >>> df.select(pl.approx_n_unique("a"))
530
- shape: (1, 1)
531
- ┌─────┐
532
- │ a │
533
- │ --- │
534
- │ u32 │
535
- ╞═════╡
536
- │ 2 │
537
- └─────┘
538
- >>> df.select(pl.approx_n_unique("b", "c"))
539
- shape: (1, 2)
540
- ┌─────┬─────┐
541
- │ b ┆ c │
542
- │ --- ┆ --- │
543
- │ u32 ┆ u32 │
544
- ╞═════╪═════╡
545
- │ 3 ┆ 2 │
546
- └─────┴─────┘
547
-
548
- """
549
- return F.col(*columns).approx_n_unique()
550
-
551
-
552
- def first(*columns: str) -> Expr:
553
- """
554
- Get the first column or value.
555
-
556
- This function has different behavior depending on the presence of `columns`
557
- values. If none given (the default), returns an expression that takes the first
558
- column of the context; otherwise, takes the first value of the given column(s).
559
-
560
- Parameters
561
- ----------
562
- *columns
563
- One or more column names.
564
-
565
- Examples
566
- --------
567
- >>> df = pl.DataFrame(
568
- ... {
569
- ... "a": [1, 8, 3],
570
- ... "b": [4, 5, 2],
571
- ... "c": ["foo", "bar", "baz"],
572
- ... }
573
- ... )
574
-
575
- Return the first column:
576
-
577
- >>> df.select(pl.first())
578
- shape: (3, 1)
579
- ┌─────┐
580
- │ a │
581
- │ --- │
582
- │ i64 │
583
- ╞═════╡
584
- │ 1 │
585
- │ 8 │
586
- │ 3 │
587
- └─────┘
588
-
589
- Return the first value for the given column(s):
590
-
591
- >>> df.select(pl.first("b"))
592
- shape: (1, 1)
593
- ┌─────┐
594
- │ b │
595
- │ --- │
596
- │ i64 │
597
- ╞═════╡
598
- │ 4 │
599
- └─────┘
600
- >>> df.select(pl.first("a", "c"))
601
- shape: (1, 2)
602
- ┌─────┬─────┐
603
- │ a ┆ c │
604
- │ --- ┆ --- │
605
- │ i64 ┆ str │
606
- ╞═════╪═════╡
607
- │ 1 ┆ foo │
608
- └─────┴─────┘
609
-
610
- """
611
- if not columns:
612
- return cs.first().as_expr()
613
-
614
- return F.col(*columns).first()
615
-
616
-
617
- def last(*columns: str) -> Expr:
618
- """
619
- Get the last column or value.
620
-
621
- This function has different behavior depending on the presence of `columns`
622
- values. If none given (the default), returns an expression that takes the last
623
- column of the context; otherwise, takes the last value of the given column(s).
624
-
625
- Parameters
626
- ----------
627
- *columns
628
- One or more column names.
629
-
630
- Examples
631
- --------
632
- >>> df = pl.DataFrame(
633
- ... {
634
- ... "a": [1, 8, 3],
635
- ... "b": [4, 5, 2],
636
- ... "c": ["foo", "bar", "baz"],
637
- ... }
638
- ... )
639
-
640
- Return the last column:
641
-
642
- >>> df.select(pl.last())
643
- shape: (3, 1)
644
- ┌─────┐
645
- │ c │
646
- │ --- │
647
- │ str │
648
- ╞═════╡
649
- │ foo │
650
- │ bar │
651
- │ baz │
652
- └─────┘
653
-
654
- Return the last value for the given column(s):
655
-
656
- >>> df.select(pl.last("a"))
657
- shape: (1, 1)
658
- ┌─────┐
659
- │ a │
660
- │ --- │
661
- │ i64 │
662
- ╞═════╡
663
- │ 3 │
664
- └─────┘
665
- >>> df.select(pl.last("b", "c"))
666
- shape: (1, 2)
667
- ┌─────┬─────┐
668
- │ b ┆ c │
669
- │ --- ┆ --- │
670
- │ i64 ┆ str │
671
- ╞═════╪═════╡
672
- │ 2 ┆ baz │
673
- └─────┴─────┘
674
-
675
- """
676
- if not columns:
677
- return cs.last().as_expr()
678
-
679
- return F.col(*columns).last()
680
-
681
-
682
- def nth(*indices: int | Sequence[int], strict: bool = True) -> Expr:
683
- """
684
- Get the nth column(s) of the context.
685
-
686
- Parameters
687
- ----------
688
- indices
689
- One or more indices representing the columns to retrieve.
690
-
691
- Examples
692
- --------
693
- >>> df = pl.DataFrame(
694
- ... {
695
- ... "a": [1, 8, 3],
696
- ... "b": [4, 5, 2],
697
- ... "c": ["foo", "bar", "baz"],
698
- ... }
699
- ... )
700
- >>> df.select(pl.nth(1))
701
- shape: (3, 1)
702
- ┌─────┐
703
- │ b │
704
- │ --- │
705
- │ i64 │
706
- ╞═════╡
707
- │ 4 │
708
- │ 5 │
709
- │ 2 │
710
- └─────┘
711
- >>> df.select(pl.nth(2, 0))
712
- shape: (3, 2)
713
- ┌─────┬─────┐
714
- │ c ┆ a │
715
- │ --- ┆ --- │
716
- │ str ┆ i64 │
717
- ╞═════╪═════╡
718
- │ foo ┆ 1 │
719
- │ bar ┆ 8 │
720
- │ baz ┆ 3 │
721
- └─────┴─────┘
722
- """
723
- return cs.by_index(*indices, require_all=strict).as_expr()
724
-
725
-
726
- def head(column: str, n: int = 10) -> Expr:
727
- """
728
- Get the first `n` rows.
729
-
730
- This function is syntactic sugar for `pl.col(column).head(n)`.
731
-
732
- Parameters
733
- ----------
734
- column
735
- Column name.
736
- n
737
- Number of rows to return.
738
-
739
- Examples
740
- --------
741
- >>> df = pl.DataFrame(
742
- ... {
743
- ... "a": [1, 8, 3],
744
- ... "b": [4, 5, 2],
745
- ... "c": ["foo", "bar", "foo"],
746
- ... }
747
- ... )
748
- >>> df.select(pl.head("a"))
749
- shape: (3, 1)
750
- ┌─────┐
751
- │ a │
752
- │ --- │
753
- │ i64 │
754
- ╞═════╡
755
- │ 1 │
756
- │ 8 │
757
- │ 3 │
758
- └─────┘
759
- >>> df.select(pl.head("a", 2))
760
- shape: (2, 1)
761
- ┌─────┐
762
- │ a │
763
- │ --- │
764
- │ i64 │
765
- ╞═════╡
766
- │ 1 │
767
- │ 8 │
768
- └─────┘
769
- """
770
- return F.col(column).head(n)
771
-
772
-
773
- def tail(column: str, n: int = 10) -> Expr:
774
- """
775
- Get the last `n` rows.
776
-
777
- This function is syntactic sugar for `pl.col(column).tail(n)`.
778
-
779
- Parameters
780
- ----------
781
- column
782
- Column name.
783
- n
784
- Number of rows to return.
785
-
786
- Examples
787
- --------
788
- >>> df = pl.DataFrame(
789
- ... {
790
- ... "a": [1, 8, 3],
791
- ... "b": [4, 5, 2],
792
- ... "c": ["foo", "bar", "foo"],
793
- ... }
794
- ... )
795
- >>> df.select(pl.tail("a"))
796
- shape: (3, 1)
797
- ┌─────┐
798
- │ a │
799
- │ --- │
800
- │ i64 │
801
- ╞═════╡
802
- │ 1 │
803
- │ 8 │
804
- │ 3 │
805
- └─────┘
806
- >>> df.select(pl.tail("a", 2))
807
- shape: (2, 1)
808
- ┌─────┐
809
- │ a │
810
- │ --- │
811
- │ i64 │
812
- ╞═════╡
813
- │ 8 │
814
- │ 3 │
815
- └─────┘
816
- """
817
- return F.col(column).tail(n)
818
-
819
-
820
- @overload
821
- def corr(
822
- a: IntoExpr,
823
- b: IntoExpr,
824
- *,
825
- method: CorrelationMethod = ...,
826
- ddof: int | None = ...,
827
- propagate_nans: bool = ...,
828
- eager: Literal[False] = ...,
829
- ) -> Expr: ...
830
-
831
-
832
- @overload
833
- def corr(
834
- a: IntoExpr,
835
- b: IntoExpr,
836
- *,
837
- method: CorrelationMethod = ...,
838
- ddof: int | None = ...,
839
- propagate_nans: bool = ...,
840
- eager: Literal[True],
841
- ) -> Series: ...
842
-
843
-
844
- def corr(
845
- a: IntoExpr,
846
- b: IntoExpr,
847
- *,
848
- method: CorrelationMethod = "pearson",
849
- ddof: int | None = None,
850
- propagate_nans: bool = False,
851
- eager: bool = False,
852
- ) -> Expr | Series:
853
- """
854
- Compute the Pearson's or Spearman rank correlation between two columns.
855
-
856
- Parameters
857
- ----------
858
- a
859
- Column name or Expression.
860
- b
861
- Column name or Expression.
862
- ddof
863
- Has no effect, do not use.
864
-
865
- .. deprecated:: 1.17.0
866
-
867
- method : {'pearson', 'spearman'}
868
- Correlation method.
869
- propagate_nans
870
- If `True` any `NaN` encountered will lead to `NaN` in the output.
871
- Defaults to `False` where `NaN` are regarded as larger than any finite number
872
- and thus lead to the highest rank.
873
- eager
874
- Evaluate immediately and return a `Series`; this requires that at least one
875
- of the given arguments is a `Series`. If set to `False` (default), return
876
- an expression instead.
877
-
878
- Examples
879
- --------
880
- Pearson's correlation:
881
-
882
- >>> df = pl.DataFrame(
883
- ... {
884
- ... "a": [1, 8, 3],
885
- ... "b": [4, 5, 2],
886
- ... "c": ["foo", "bar", "foo"],
887
- ... }
888
- ... )
889
- >>> df.select(pl.corr("a", "b"))
890
- shape: (1, 1)
891
- ┌──────────┐
892
- │ a │
893
- │ --- │
894
- │ f64 │
895
- ╞══════════╡
896
- │ 0.544705 │
897
- └──────────┘
898
-
899
- Spearman rank correlation:
900
-
901
- >>> df.select(pl.corr("a", "b", method="spearman"))
902
- shape: (1, 1)
903
- ┌─────┐
904
- │ a │
905
- │ --- │
906
- │ f64 │
907
- ╞═════╡
908
- │ 0.5 │
909
- └─────┘
910
-
911
- Eager evaluation:
912
-
913
- >>> s1 = pl.Series("a", [1, 8, 3])
914
- >>> s2 = pl.Series("b", [4, 5, 2])
915
- >>> pl.corr(s1, s2, eager=True)
916
- shape: (1,)
917
- Series: 'a' [f64]
918
- [
919
- 0.544705
920
- ]
921
- >>> pl.corr(s1, s2, method="spearman", eager=True)
922
- shape: (1,)
923
- Series: 'a' [f64]
924
- [
925
- 0.5
926
- ]
927
- """
928
- if ddof is not None:
929
- issue_deprecation_warning(
930
- "the `ddof` parameter has no effect. Do not use it.",
931
- version="1.17.0",
932
- )
933
-
934
- if eager:
935
- if not (isinstance(a, pl.Series) or isinstance(b, pl.Series)):
936
- msg = "expected at least one Series in 'corr' inputs if 'eager=True'"
937
- raise ValueError(msg)
938
-
939
- frame = pl.DataFrame([e for e in (a, b) if isinstance(e, pl.Series)])
940
- exprs = ((e.name if isinstance(e, pl.Series) else e) for e in (a, b))
941
- return frame.select(
942
- corr(*exprs, eager=False, method=method, propagate_nans=propagate_nans)
943
- ).to_series()
944
- else:
945
- a_pyexpr = parse_into_expression(a)
946
- b_pyexpr = parse_into_expression(b)
947
-
948
- if method == "pearson":
949
- return wrap_expr(plr.pearson_corr(a_pyexpr, b_pyexpr))
950
- elif method == "spearman":
951
- return wrap_expr(plr.spearman_rank_corr(a_pyexpr, b_pyexpr, propagate_nans))
952
- else:
953
- msg = f"method must be one of {{'pearson', 'spearman'}}, got {method!r}"
954
- raise ValueError(msg)
955
-
956
-
957
- @overload
958
- def cov(
959
- a: IntoExpr,
960
- b: IntoExpr,
961
- *,
962
- ddof: int = ...,
963
- eager: Literal[False] = ...,
964
- ) -> Expr: ...
965
-
966
-
967
- @overload
968
- def cov(
969
- a: IntoExpr,
970
- b: IntoExpr,
971
- *,
972
- ddof: int = ...,
973
- eager: Literal[True],
974
- ) -> Series: ...
975
-
976
-
977
- def cov(
978
- a: IntoExpr,
979
- b: IntoExpr,
980
- *,
981
- ddof: int = 1,
982
- eager: bool = False,
983
- ) -> Expr | Series:
984
- """
985
- Compute the covariance between two columns/ expressions.
986
-
987
- Parameters
988
- ----------
989
- a
990
- Column name or Expression.
991
- b
992
- Column name or Expression.
993
- ddof
994
- "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
995
- where N represents the number of elements.
996
- By default ddof is 1.
997
- eager
998
- Evaluate immediately and return a `Series`; this requires that at least one
999
- of the given arguments is a `Series`. If set to `False` (default), return
1000
- an expression instead.
1001
-
1002
- Examples
1003
- --------
1004
- >>> df = pl.DataFrame(
1005
- ... {
1006
- ... "a": [1, 8, 3],
1007
- ... "b": [4, 5, 2],
1008
- ... "c": ["foo", "bar", "foo"],
1009
- ... },
1010
- ... )
1011
-
1012
- >>> df.select(
1013
- ... x=pl.cov("a", "b"),
1014
- ... y=pl.cov("a", "b", ddof=2),
1015
- ... )
1016
- shape: (1, 2)
1017
- ┌─────┬─────┐
1018
- │ x ┆ y │
1019
- │ --- ┆ --- │
1020
- │ f64 ┆ f64 │
1021
- ╞═════╪═════╡
1022
- │ 3.0 ┆ 6.0 │
1023
- └─────┴─────┘
1024
-
1025
- Eager evaluation:
1026
-
1027
- >>> s1 = pl.Series("a", [1, 8, 3])
1028
- >>> s2 = pl.Series("b", [4, 5, 2])
1029
- >>> pl.cov(s1, s2, eager=True)
1030
- shape: (1,)
1031
- Series: 'a' [f64]
1032
- [
1033
- 3.0
1034
- ]
1035
- """
1036
- if eager:
1037
- if not (isinstance(a, pl.Series) or isinstance(b, pl.Series)):
1038
- msg = "expected at least one Series in 'cov' inputs if 'eager=True'"
1039
- raise ValueError(msg)
1040
-
1041
- frame = pl.DataFrame([e for e in (a, b) if isinstance(e, pl.Series)])
1042
- exprs = ((e.name if isinstance(e, pl.Series) else e) for e in (a, b))
1043
- return frame.select(cov(*exprs, eager=False, ddof=ddof)).to_series()
1044
- else:
1045
- a_pyexpr = parse_into_expression(a)
1046
- b_pyexpr = parse_into_expression(b)
1047
- return wrap_expr(plr.cov(a_pyexpr, b_pyexpr, ddof))
1048
-
1049
-
1050
- class _map_batches_wrapper:
1051
- def __init__(
1052
- self,
1053
- function: Callable[[Sequence[Series]], Series | Any],
1054
- *,
1055
- returns_scalar: bool,
1056
- ) -> None:
1057
- self.function = function
1058
- self.returns_scalar = returns_scalar
1059
-
1060
- def __call__(
1061
- self, sl: list[plr.PySeries], *args: Any, **kwargs: Any
1062
- ) -> plr.PySeries:
1063
- return_dtype = kwargs["return_dtype"]
1064
- slp = [wrap_s(s) for s in sl]
1065
-
1066
- # ufunc and numba don't expect return_dtype
1067
- try:
1068
- rv = self.function(slp, *args, **kwargs)
1069
- except TypeError as e:
1070
- if "unexpected keyword argument 'return_dtype'" in e.args[0]:
1071
- kwargs.pop("return_dtype")
1072
- rv = self.function(slp, *args, **kwargs)
1073
- else:
1074
- raise
1075
-
1076
- if _check_for_numpy(rv) and isinstance(rv, np.ndarray):
1077
- rv = pl.Series(rv, dtype=return_dtype)
1078
-
1079
- if isinstance(rv, pl.Series):
1080
- return rv._s
1081
- elif self.returns_scalar:
1082
- return pl.Series([rv], dtype=return_dtype)._s
1083
- else:
1084
- msg = f"`map` with `returns_scalar=False` must return a Series; found {qualified_type_name(rv)!r}.\n\nIf `returns_scalar` is set to `True`, a returned value can be a scalar value."
1085
- raise TypeError(msg)
1086
-
1087
-
1088
- def map_batches(
1089
- exprs: Sequence[str | Expr],
1090
- function: Callable[[Sequence[Series]], Series | Any],
1091
- return_dtype: PolarsDataType | pl.DataTypeExpr | None = None,
1092
- *,
1093
- is_elementwise: bool = False,
1094
- returns_scalar: bool = False,
1095
- ) -> Expr:
1096
- """
1097
- Map a custom function over multiple columns/expressions.
1098
-
1099
- Produces a single Series result.
1100
-
1101
- .. warning::
1102
- This method is much slower than the native expressions API.
1103
- Only use it if you cannot implement your logic otherwise.
1104
-
1105
- Parameters
1106
- ----------
1107
- exprs
1108
- Expression(s) representing the input Series to the function.
1109
- function
1110
- Function to apply over the input.
1111
- return_dtype
1112
- Datatype of the output Series.
1113
-
1114
- It is recommended to set this whenever possible. If this is `None`, it tries
1115
- to infer the datatype by calling the function with dummy data and looking at
1116
- the output.
1117
- is_elementwise
1118
- Set to true if the operations is elementwise for better performance
1119
- and optimization.
1120
-
1121
- An elementwise operations has unit or equal length for all inputs
1122
- and can be ran sequentially on slices without results being affected.
1123
- returns_scalar
1124
- If the function returns a scalar, by default it will be wrapped in
1125
- a list in the output, since the assumption is that the function
1126
- always returns something Series-like. If you want to keep the
1127
- result as a scalar, set this argument to True.
1128
-
1129
- Notes
1130
- -----
1131
- A UDF passed to `map_batches` must be pure, meaning that it cannot modify
1132
- or depend on state other than its arguments. We may call the function
1133
- with arbitrary input data.
1134
-
1135
- Returns
1136
- -------
1137
- Expr
1138
- Expression with the data type given by `return_dtype`.
1139
-
1140
- Examples
1141
- --------
1142
- >>> def test_func(a, b, c):
1143
- ... return a + b + c
1144
- >>> df = pl.DataFrame(
1145
- ... {
1146
- ... "a": [1, 2, 3, 4],
1147
- ... "b": [4, 5, 6, 7],
1148
- ... }
1149
- ... )
1150
- >>>
1151
- >>> df.with_columns(
1152
- ... (
1153
- ... pl.struct(["a", "b"]).map_batches(
1154
- ... lambda x: test_func(x.struct.field("a"), x.struct.field("b"), 1)
1155
- ... )
1156
- ... ).alias("a+b+c")
1157
- ... )
1158
- shape: (4, 3)
1159
- ┌─────┬─────┬───────┐
1160
- │ a ┆ b ┆ a+b+c │
1161
- │ --- ┆ --- ┆ --- │
1162
- │ i64 ┆ i64 ┆ i64 │
1163
- ╞═════╪═════╪═══════╡
1164
- │ 1 ┆ 4 ┆ 6 │
1165
- │ 2 ┆ 5 ┆ 8 │
1166
- │ 3 ┆ 6 ┆ 10 │
1167
- │ 4 ┆ 7 ┆ 12 │
1168
- └─────┴─────┴───────┘
1169
- """
1170
- pyexprs = parse_into_list_of_expressions(exprs)
1171
-
1172
- return_dtype_expr = (
1173
- parse_into_datatype_expr(return_dtype)._pydatatype_expr
1174
- if return_dtype is not None
1175
- else None
1176
- )
1177
-
1178
- return wrap_expr(
1179
- plr.map_expr(
1180
- pyexprs,
1181
- _map_batches_wrapper(function, returns_scalar=returns_scalar),
1182
- return_dtype_expr,
1183
- is_elementwise=is_elementwise,
1184
- returns_scalar=returns_scalar,
1185
- )
1186
- )
1187
-
1188
-
1189
- def map_groups(
1190
- exprs: Sequence[str | Expr],
1191
- function: Callable[[Sequence[Series]], Series | Any],
1192
- return_dtype: PolarsDataType | pl.DataTypeExpr | None = None,
1193
- *,
1194
- is_elementwise: bool = False,
1195
- returns_scalar: bool = False,
1196
- ) -> Expr:
1197
- """
1198
- Apply a custom/user-defined function (UDF) in a GroupBy context.
1199
-
1200
- .. warning::
1201
- This method is much slower than the native expressions API.
1202
- Only use it if you cannot implement your logic otherwise.
1203
-
1204
- Parameters
1205
- ----------
1206
- exprs
1207
- Expression(s) representing the input Series to the function.
1208
- function
1209
- Function to apply over the input; should be of type Callable[[Series], Series].
1210
- return_dtype
1211
- Datatype of the output Series.
1212
-
1213
- It is recommended to set this whenever possible. If this is `None`, it tries
1214
- to infer the datatype by calling the function with dummy data and looking at
1215
- the output.
1216
- is_elementwise
1217
- Set to true if the operations is elementwise for better performance
1218
- and optimization.
1219
-
1220
- An elementwise operations has unit or equal length for all inputs
1221
- and can be ran sequentially on slices without results being affected.
1222
- returns_scalar
1223
- If the function returns a single scalar as output.
1224
-
1225
- Notes
1226
- -----
1227
- A UDF passed to `map_batches` must be pure, meaning that it cannot modify
1228
- or depend on state other than its arguments. Polars may call the function
1229
- with arbitrary input data.
1230
-
1231
- Returns
1232
- -------
1233
- Expr
1234
- Expression with the data type given by `return_dtype`.
1235
-
1236
- Examples
1237
- --------
1238
- >>> df = pl.DataFrame(
1239
- ... {
1240
- ... "group": [1, 1, 2],
1241
- ... "a": [1, 3, 3],
1242
- ... "b": [5, 6, 7],
1243
- ... }
1244
- ... )
1245
- >>> df
1246
- shape: (3, 3)
1247
- ┌───────┬─────┬─────┐
1248
- │ group ┆ a ┆ b │
1249
- │ --- ┆ --- ┆ --- │
1250
- │ i64 ┆ i64 ┆ i64 │
1251
- ╞═══════╪═════╪═════╡
1252
- │ 1 ┆ 1 ┆ 5 │
1253
- │ 1 ┆ 3 ┆ 6 │
1254
- │ 2 ┆ 3 ┆ 7 │
1255
- └───────┴─────┴─────┘
1256
- >>> (
1257
- ... df.group_by("group").agg(
1258
- ... pl.map_groups(
1259
- ... exprs=["a", "b"],
1260
- ... function=lambda list_of_series: list_of_series[0]
1261
- ... / list_of_series[0].sum()
1262
- ... + list_of_series[1],
1263
- ... return_dtype=pl.Float64,
1264
- ... ).alias("my_custom_aggregation")
1265
- ... )
1266
- ... ).sort("group")
1267
- shape: (2, 2)
1268
- ┌───────┬───────────────────────┐
1269
- │ group ┆ my_custom_aggregation │
1270
- │ --- ┆ --- │
1271
- │ i64 ┆ list[f64] │
1272
- ╞═══════╪═══════════════════════╡
1273
- │ 1 ┆ [5.25, 6.75] │
1274
- │ 2 ┆ [8.0] │
1275
- └───────┴───────────────────────┘
1276
-
1277
- The output for group `1` can be understood as follows:
1278
-
1279
- - group `1` contains Series `'a': [1, 3]` and `'b': [5, 6]`
1280
- - applying the function to those lists of Series, one gets the output
1281
- `[1 / 4 + 5, 3 / 4 + 6]`, i.e. `[5.25, 6.75]`
1282
- """
1283
- return map_batches(
1284
- exprs,
1285
- function,
1286
- return_dtype,
1287
- is_elementwise=is_elementwise,
1288
- returns_scalar=returns_scalar,
1289
- )
1290
-
1291
-
1292
- def _row_encode(
1293
- exprs: pl.Selector | pl.Expr | Sequence[str | pl.Expr],
1294
- *,
1295
- unordered: bool = False,
1296
- descending: list[bool] | None = None,
1297
- nulls_last: list[bool] | None = None,
1298
- ) -> Expr:
1299
- if isinstance(exprs, pl.Selector):
1300
- exprs = [exprs.as_expr()]
1301
- elif isinstance(exprs, pl.Expr):
1302
- exprs = [exprs]
1303
-
1304
- pyexprs = parse_into_list_of_expressions(exprs)
1305
-
1306
- if unordered:
1307
- assert descending is None
1308
- assert nulls_last is None
1309
-
1310
- result = plr.PyExpr.row_encode_unordered(pyexprs)
1311
- else:
1312
- result = plr.PyExpr.row_encode_ordered(pyexprs, descending, nulls_last)
1313
-
1314
- return wrap_expr(result)
1315
-
1316
-
1317
- def _wrap_acc_lamba(
1318
- function: Callable[[Series, Series], Series],
1319
- ) -> Callable[[tuple[plr.PySeries, plr.PySeries]], plr.PySeries]:
1320
- def wrapper(t: tuple[plr.PySeries, plr.PySeries]) -> plr.PySeries:
1321
- a, b = t
1322
- return function(wrap_s(a), wrap_s(b))._s
1323
-
1324
- return wrapper
1325
-
1326
-
1327
- def fold(
1328
- acc: IntoExpr,
1329
- function: Callable[[Series, Series], Series],
1330
- exprs: Sequence[Expr | str] | Expr,
1331
- *,
1332
- returns_scalar: bool = False,
1333
- return_dtype: pl.DataTypeExpr | PolarsDataType | None = None,
1334
- ) -> Expr:
1335
- """
1336
- Accumulate over multiple columns horizontally/ row wise with a left fold.
1337
-
1338
- Parameters
1339
- ----------
1340
- acc
1341
- Accumulator Expression. This is the value that will be initialized when the fold
1342
- starts. For a sum this could for instance be lit(0).
1343
- function
1344
- Function to apply over the accumulator and the value.
1345
- Fn(acc, value) -> new_value
1346
- exprs
1347
- Expressions to aggregate over. May also be a wildcard expression.
1348
- returns_scalar
1349
- Whether or not `function` applied returns a scalar. This must be set correctly
1350
- by the user.
1351
- return_dtype
1352
- Output datatype.
1353
- If not set, the dtype will be inferred based on the dtype
1354
- of the accumulator.
1355
-
1356
- Notes
1357
- -----
1358
- If you simply want the first encountered expression as accumulator,
1359
- consider using `reduce`.
1360
-
1361
- Examples
1362
- --------
1363
- >>> df = pl.DataFrame(
1364
- ... {
1365
- ... "a": [1, 2, 3],
1366
- ... "b": [3, 4, 5],
1367
- ... "c": [5, 6, 7],
1368
- ... }
1369
- ... )
1370
- >>> df
1371
- shape: (3, 3)
1372
- ┌─────┬─────┬─────┐
1373
- │ a ┆ b ┆ c │
1374
- │ --- ┆ --- ┆ --- │
1375
- │ i64 ┆ i64 ┆ i64 │
1376
- ╞═════╪═════╪═════╡
1377
- │ 1 ┆ 3 ┆ 5 │
1378
- │ 2 ┆ 4 ┆ 6 │
1379
- │ 3 ┆ 5 ┆ 7 │
1380
- └─────┴─────┴─────┘
1381
-
1382
- Horizontally sum over all columns and add 1.
1383
-
1384
- >>> df.select(
1385
- ... pl.fold(
1386
- ... acc=pl.lit(1), function=lambda acc, x: acc + x, exprs=pl.col("*")
1387
- ... ).alias("sum"),
1388
- ... )
1389
- shape: (3, 1)
1390
- ┌─────┐
1391
- │ sum │
1392
- │ --- │
1393
- │ i32 │
1394
- ╞═════╡
1395
- │ 10 │
1396
- │ 13 │
1397
- │ 16 │
1398
- └─────┘
1399
-
1400
- You can also apply a condition/predicate on all columns:
1401
-
1402
- >>> df = pl.DataFrame(
1403
- ... {
1404
- ... "a": [1, 2, 3],
1405
- ... "b": [0, 1, 2],
1406
- ... }
1407
- ... )
1408
- >>> df
1409
- shape: (3, 2)
1410
- ┌─────┬─────┐
1411
- │ a ┆ b │
1412
- │ --- ┆ --- │
1413
- │ i64 ┆ i64 │
1414
- ╞═════╪═════╡
1415
- │ 1 ┆ 0 │
1416
- │ 2 ┆ 1 │
1417
- │ 3 ┆ 2 │
1418
- └─────┴─────┘
1419
-
1420
- >>> df.filter(
1421
- ... pl.fold(
1422
- ... acc=pl.lit(True),
1423
- ... function=lambda acc, x: acc & x,
1424
- ... exprs=pl.col("*") > 1,
1425
- ... )
1426
- ... )
1427
- shape: (1, 2)
1428
- ┌─────┬─────┐
1429
- │ a ┆ b │
1430
- │ --- ┆ --- │
1431
- │ i64 ┆ i64 │
1432
- ╞═════╪═════╡
1433
- │ 3 ┆ 2 │
1434
- └─────┴─────┘
1435
- """
1436
- # in case of col("*")
1437
- pyacc = parse_into_expression(acc, str_as_lit=True)
1438
- if isinstance(exprs, pl.Expr):
1439
- exprs = [exprs]
1440
-
1441
- rt: plr.PyDataTypeExpr | None = None
1442
- if return_dtype is not None:
1443
- rt = parse_into_datatype_expr(return_dtype)._pydatatype_expr
1444
-
1445
- pyexprs = parse_into_list_of_expressions(exprs)
1446
- return wrap_expr(
1447
- plr.fold(
1448
- pyacc,
1449
- _wrap_acc_lamba(function),
1450
- pyexprs,
1451
- returns_scalar=returns_scalar,
1452
- return_dtype=rt,
1453
- )
1454
- )
1455
-
1456
-
1457
- def reduce(
1458
- function: Callable[[Series, Series], Series],
1459
- exprs: Sequence[Expr | str] | Expr,
1460
- *,
1461
- returns_scalar: bool = False,
1462
- return_dtype: pl.DataTypeExpr | PolarsDataType | None = None,
1463
- ) -> Expr:
1464
- """
1465
- Accumulate over multiple columns horizontally/ row wise with a left fold.
1466
-
1467
- Parameters
1468
- ----------
1469
- function
1470
- Function to apply over the accumulator and the value.
1471
- Fn(acc, value) -> new_value
1472
- exprs
1473
- Expressions to aggregate over. May also be a wildcard expression.
1474
- returns_scalar
1475
- Whether or not `function` applied returns a scalar. This must be set correctly
1476
- by the user.
1477
- return_dtype
1478
- Output datatype.
1479
- If not set, the dtype will be inferred based on the dtype of the input
1480
- expressions.
1481
-
1482
- Notes
1483
- -----
1484
- See `fold` for the version with an explicit accumulator.
1485
-
1486
- Examples
1487
- --------
1488
- >>> df = pl.DataFrame(
1489
- ... {
1490
- ... "a": [1, 2, 3],
1491
- ... "b": [0, 1, 2],
1492
- ... }
1493
- ... )
1494
- >>> df
1495
- shape: (3, 2)
1496
- ┌─────┬─────┐
1497
- │ a ┆ b │
1498
- │ --- ┆ --- │
1499
- │ i64 ┆ i64 │
1500
- ╞═════╪═════╡
1501
- │ 1 ┆ 0 │
1502
- │ 2 ┆ 1 │
1503
- │ 3 ┆ 2 │
1504
- └─────┴─────┘
1505
-
1506
- Horizontally sum over all columns.
1507
-
1508
- >>> df.select(
1509
- ... pl.reduce(function=lambda acc, x: acc + x, exprs=pl.col("*")).alias("sum")
1510
- ... )
1511
- shape: (3, 1)
1512
- ┌─────┐
1513
- │ sum │
1514
- │ --- │
1515
- │ i64 │
1516
- ╞═════╡
1517
- │ 1 │
1518
- │ 3 │
1519
- │ 5 │
1520
- └─────┘
1521
- """
1522
- if isinstance(exprs, pl.Expr):
1523
- exprs = [exprs]
1524
-
1525
- rt: plr.PyDataTypeExpr | None = None
1526
- if return_dtype is not None:
1527
- rt = parse_into_datatype_expr(return_dtype)._pydatatype_expr
1528
-
1529
- pyexprs = parse_into_list_of_expressions(exprs)
1530
- return wrap_expr(
1531
- plr.reduce(
1532
- _wrap_acc_lamba(function),
1533
- pyexprs,
1534
- returns_scalar=returns_scalar,
1535
- return_dtype=rt,
1536
- )
1537
- )
1538
-
1539
-
1540
- def cum_fold(
1541
- acc: IntoExpr,
1542
- function: Callable[[Series, Series], Series],
1543
- exprs: Sequence[Expr | str] | Expr,
1544
- *,
1545
- returns_scalar: bool = False,
1546
- return_dtype: pl.DataTypeExpr | PolarsDataType | None = None,
1547
- include_init: bool = False,
1548
- ) -> Expr:
1549
- """
1550
- Cumulatively fold horizontally across columns with a left fold.
1551
-
1552
- Every cumulative result is added as a separate field in a Struct column.
1553
-
1554
- Parameters
1555
- ----------
1556
- acc
1557
- Accumulator expression. This is the value that will be initialized when the fold
1558
- starts. For a sum this could for instance be lit(0).
1559
- function
1560
- Function to apply over the accumulator and the value.
1561
- Fn(acc, value) -> new_value
1562
- exprs
1563
- Expressions to aggregate over. May also be a wildcard expression.
1564
- returns_scalar
1565
- Whether or not `function` applied returns a scalar. This must be set correctly
1566
- by the user.
1567
- return_dtype
1568
- Output datatype.
1569
- If not set, the dtype will be inferred based on the dtype of the accumulator.
1570
- include_init
1571
- Include the initial accumulator state as struct field.
1572
-
1573
- Notes
1574
- -----
1575
- If you simply want the first encountered expression as accumulator,
1576
- consider using :func:`cum_reduce`.
1577
-
1578
- Examples
1579
- --------
1580
- >>> df = pl.DataFrame(
1581
- ... {
1582
- ... "a": [1, 2, 3],
1583
- ... "b": [3, 4, 5],
1584
- ... "c": [5, 6, 7],
1585
- ... }
1586
- ... )
1587
- >>> df.with_columns(
1588
- ... pl.cum_fold(acc=pl.lit(1), function=lambda acc, x: acc + x, exprs=pl.all())
1589
- ... )
1590
- shape: (3, 4)
1591
- ┌─────┬─────┬─────┬───────────┐
1592
- │ a ┆ b ┆ c ┆ cum_fold │
1593
- │ --- ┆ --- ┆ --- ┆ --- │
1594
- │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
1595
- ╞═════╪═════╪═════╪═══════════╡
1596
- │ 1 ┆ 3 ┆ 5 ┆ {2,5,10} │
1597
- │ 2 ┆ 4 ┆ 6 ┆ {3,7,13} │
1598
- │ 3 ┆ 5 ┆ 7 ┆ {4,9,16} │
1599
- └─────┴─────┴─────┴───────────┘
1600
- """
1601
- # in case of col("*")
1602
- pyacc = parse_into_expression(acc, str_as_lit=True)
1603
- if isinstance(exprs, pl.Expr):
1604
- exprs = [exprs]
1605
-
1606
- rt: plr.PyDataTypeExpr | None = None
1607
- if return_dtype is not None:
1608
- rt = parse_into_datatype_expr(return_dtype)._pydatatype_expr
1609
-
1610
- pyexprs = parse_into_list_of_expressions(exprs)
1611
- return wrap_expr(
1612
- plr.cum_fold(
1613
- pyacc,
1614
- _wrap_acc_lamba(function),
1615
- pyexprs,
1616
- returns_scalar=returns_scalar,
1617
- return_dtype=rt,
1618
- include_init=include_init,
1619
- ).alias("cum_fold")
1620
- )
1621
-
1622
-
1623
- def cum_reduce(
1624
- function: Callable[[Series, Series], Series],
1625
- exprs: Sequence[Expr | str] | Expr,
1626
- *,
1627
- returns_scalar: bool = False,
1628
- return_dtype: pl.DataTypeExpr | PolarsDataType | None = None,
1629
- ) -> Expr:
1630
- """
1631
- Cumulatively reduce horizontally across columns with a left fold.
1632
-
1633
- Every cumulative result is added as a separate field in a Struct column.
1634
-
1635
- Parameters
1636
- ----------
1637
- function
1638
- Function to apply over the accumulator and the value.
1639
- Fn(acc, value) -> new_value
1640
- exprs
1641
- Expressions to aggregate over. May also be a wildcard expression.
1642
- return_dtype
1643
- Output datatype.
1644
- If not set, the dtype will be inferred based on the dtype of the input
1645
- expressions.
1646
- include_init
1647
- Include the initial accumulator state as struct field.
1648
-
1649
- Examples
1650
- --------
1651
- >>> df = pl.DataFrame(
1652
- ... {
1653
- ... "a": [1, 2, 3],
1654
- ... "b": [3, 4, 5],
1655
- ... "c": [5, 6, 7],
1656
- ... }
1657
- ... )
1658
- >>> df.with_columns(pl.cum_reduce(function=lambda acc, x: acc + x, exprs=pl.all()))
1659
- shape: (3, 4)
1660
- ┌─────┬─────┬─────┬────────────┐
1661
- │ a ┆ b ┆ c ┆ cum_reduce │
1662
- │ --- ┆ --- ┆ --- ┆ --- │
1663
- │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
1664
- ╞═════╪═════╪═════╪════════════╡
1665
- │ 1 ┆ 3 ┆ 5 ┆ {1,4,9} │
1666
- │ 2 ┆ 4 ┆ 6 ┆ {2,6,12} │
1667
- │ 3 ┆ 5 ┆ 7 ┆ {3,8,15} │
1668
- └─────┴─────┴─────┴────────────┘
1669
- """
1670
- # in case of col("*")
1671
- if isinstance(exprs, pl.Expr):
1672
- exprs = [exprs]
1673
-
1674
- rt: plr.PyDataTypeExpr | None = None
1675
- if return_dtype is not None:
1676
- rt = parse_into_datatype_expr(return_dtype)._pydatatype_expr
1677
-
1678
- pyexprs = parse_into_list_of_expressions(exprs)
1679
- return wrap_expr(
1680
- plr.cum_reduce(
1681
- _wrap_acc_lamba(function),
1682
- pyexprs,
1683
- returns_scalar=returns_scalar,
1684
- return_dtype=rt,
1685
- ).alias("cum_reduce")
1686
- )
1687
-
1688
-
1689
- def arctan2(y: str | Expr, x: str | Expr) -> Expr:
1690
- """
1691
- Compute two argument arctan in radians.
1692
-
1693
- Returns the angle (in radians) in the plane between the
1694
- positive x-axis and the ray from the origin to (x,y).
1695
-
1696
- Parameters
1697
- ----------
1698
- y
1699
- Column name or Expression.
1700
- x
1701
- Column name or Expression.
1702
-
1703
- Examples
1704
- --------
1705
- >>> c = (2**0.5) / 2
1706
- >>> df = pl.DataFrame(
1707
- ... {
1708
- ... "y": [c, -c, c, -c],
1709
- ... "x": [c, c, -c, -c],
1710
- ... }
1711
- ... )
1712
- >>> df.with_columns(pl.arctan2("y", "x").alias("atan2"))
1713
- shape: (4, 3)
1714
- ┌───────────┬───────────┬───────────┐
1715
- │ y ┆ x ┆ atan2 │
1716
- │ --- ┆ --- ┆ --- │
1717
- │ f64 ┆ f64 ┆ f64 │
1718
- ╞═══════════╪═══════════╪═══════════╡
1719
- │ 0.707107 ┆ 0.707107 ┆ 0.785398 │
1720
- │ -0.707107 ┆ 0.707107 ┆ -0.785398 │
1721
- │ 0.707107 ┆ -0.707107 ┆ 2.356194 │
1722
- │ -0.707107 ┆ -0.707107 ┆ -2.356194 │
1723
- └───────────┴───────────┴───────────┘
1724
- """
1725
- if isinstance(y, str):
1726
- y = F.col(y)
1727
- if isinstance(x, str):
1728
- x = F.col(x)
1729
- if not hasattr(x, "_pyexpr"):
1730
- msg = f"`arctan2` expected a `str` or `Expr` got a `{qualified_type_name(x)}`"
1731
- raise TypeError(msg)
1732
- if not hasattr(y, "_pyexpr"):
1733
- msg = f"`arctan2` expected a `str` or `Expr` got a `{qualified_type_name(y)}`"
1734
- raise TypeError(msg)
1735
-
1736
- return wrap_expr(plr.arctan2(y._pyexpr, x._pyexpr))
1737
-
1738
-
1739
- @deprecated("`arctan2d` is deprecated; use `arctan2` followed by `.degrees()` instead.")
1740
- def arctan2d(y: str | Expr, x: str | Expr) -> Expr:
1741
- """
1742
- Compute two argument arctan in degrees.
1743
-
1744
- .. deprecated:: 1.0.0
1745
- Use `arctan2` followed by :meth:`Expr.degrees` instead.
1746
-
1747
- Returns the angle (in degrees) in the plane between the positive x-axis
1748
- and the ray from the origin to (x,y).
1749
-
1750
- Parameters
1751
- ----------
1752
- y
1753
- Column name or Expression.
1754
- x
1755
- Column name or Expression.
1756
-
1757
- Examples
1758
- --------
1759
- >>> c = (2**0.5) / 2
1760
- >>> df = pl.DataFrame(
1761
- ... {
1762
- ... "y": [c, -c, c, -c],
1763
- ... "x": [c, c, -c, -c],
1764
- ... }
1765
- ... )
1766
- >>> df.select( # doctest: +SKIP
1767
- ... pl.arctan2d("y", "x").alias("atan2d"),
1768
- ... pl.arctan2("y", "x").alias("atan2"),
1769
- ... )
1770
- shape: (4, 2)
1771
- ┌────────┬───────────┐
1772
- │ atan2d ┆ atan2 │
1773
- │ --- ┆ --- │
1774
- │ f64 ┆ f64 │
1775
- ╞════════╪═══════════╡
1776
- │ 45.0 ┆ 0.785398 │
1777
- │ -45.0 ┆ -0.785398 │
1778
- │ 135.0 ┆ 2.356194 │
1779
- │ -135.0 ┆ -2.356194 │
1780
- └────────┴───────────┘
1781
- """
1782
- return arctan2(y, x).degrees()
1783
-
1784
-
1785
- def exclude(
1786
- columns: str | PolarsDataType | Collection[str] | Collection[PolarsDataType],
1787
- *more_columns: str | PolarsDataType,
1788
- ) -> Expr:
1789
- """
1790
- Represent all columns except for the given columns.
1791
-
1792
- Syntactic sugar for `pl.all().exclude(columns)`.
1793
-
1794
- Parameters
1795
- ----------
1796
- columns
1797
- The name or datatype of the column(s) to exclude. Accepts regular expression
1798
- input. Regular expressions should start with `^` and end with `$`.
1799
- *more_columns
1800
- Additional names or datatypes of columns to exclude, specified as positional
1801
- arguments.
1802
-
1803
- Examples
1804
- --------
1805
- Exclude by column name(s):
1806
-
1807
- >>> df = pl.DataFrame(
1808
- ... {
1809
- ... "aa": [1, 2, 3],
1810
- ... "ba": ["a", "b", None],
1811
- ... "cc": [None, 2.5, 1.5],
1812
- ... }
1813
- ... )
1814
- >>> df.select(pl.exclude("ba"))
1815
- shape: (3, 2)
1816
- ┌─────┬──────┐
1817
- │ aa ┆ cc │
1818
- │ --- ┆ --- │
1819
- │ i64 ┆ f64 │
1820
- ╞═════╪══════╡
1821
- │ 1 ┆ null │
1822
- │ 2 ┆ 2.5 │
1823
- │ 3 ┆ 1.5 │
1824
- └─────┴──────┘
1825
-
1826
- Exclude by regex, e.g. removing all columns whose names end with the letter "a":
1827
-
1828
- >>> df.select(pl.exclude("^.*a$"))
1829
- shape: (3, 1)
1830
- ┌──────┐
1831
- │ cc │
1832
- │ --- │
1833
- │ f64 │
1834
- ╞══════╡
1835
- │ null │
1836
- │ 2.5 │
1837
- │ 1.5 │
1838
- └──────┘
1839
-
1840
- Exclude by dtype(s), e.g. removing all columns of type Int64 or Float64:
1841
-
1842
- >>> df.select(pl.exclude([pl.Int64, pl.Float64]))
1843
- shape: (3, 1)
1844
- ┌──────┐
1845
- │ ba │
1846
- │ --- │
1847
- │ str │
1848
- ╞══════╡
1849
- │ a │
1850
- │ b │
1851
- │ null │
1852
- └──────┘
1853
-
1854
- """
1855
- return F.col("*").exclude(columns, *more_columns)
1856
-
1857
-
1858
- def groups(column: str) -> Expr:
1859
- """Syntactic sugar for `pl.col("foo").agg_groups()`."""
1860
- return F.col(column).agg_groups()
1861
-
1862
-
1863
- def quantile(
1864
- column: str,
1865
- quantile: float | Expr,
1866
- interpolation: QuantileMethod = "nearest",
1867
- ) -> Expr:
1868
- """
1869
- Syntactic sugar for `pl.col("foo").quantile(..)`.
1870
-
1871
- Parameters
1872
- ----------
1873
- column
1874
- Column name.
1875
- quantile
1876
- Quantile between 0.0 and 1.0.
1877
- interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'}
1878
- Interpolation method.
1879
- """
1880
- return F.col(column).quantile(quantile, interpolation)
1881
-
1882
-
1883
- def arg_sort_by(
1884
- exprs: IntoExpr | Iterable[IntoExpr],
1885
- *more_exprs: IntoExpr,
1886
- descending: bool | Sequence[bool] = False,
1887
- nulls_last: bool | Sequence[bool] = False,
1888
- multithreaded: bool = True,
1889
- maintain_order: bool = False,
1890
- ) -> Expr:
1891
- """
1892
- Return the row indices that would sort the column(s).
1893
-
1894
- Parameters
1895
- ----------
1896
- exprs
1897
- Column(s) to arg sort by. Accepts expression input. Strings are parsed as column
1898
- names.
1899
- *more_exprs
1900
- Additional columns to arg sort by, specified as positional arguments.
1901
- descending
1902
- Sort in descending order. When sorting by multiple columns, can be specified
1903
- per column by passing a sequence of booleans.
1904
- nulls_last
1905
- Place null values last.
1906
- multithreaded
1907
- Sort using multiple threads.
1908
- maintain_order
1909
- Whether the order should be maintained if elements are equal.
1910
-
1911
- See Also
1912
- --------
1913
- Expr.gather: Take values by index.
1914
- Expr.rank : Get the rank of each row.
1915
-
1916
- Examples
1917
- --------
1918
- Pass a single column name to compute the arg sort by that column.
1919
-
1920
- >>> df = pl.DataFrame(
1921
- ... {
1922
- ... "a": [0, 1, 1, 0],
1923
- ... "b": [3, 2, 3, 2],
1924
- ... "c": [1, 2, 3, 4],
1925
- ... }
1926
- ... )
1927
- >>> df.select(pl.arg_sort_by("a"))
1928
- shape: (4, 1)
1929
- ┌─────┐
1930
- │ a │
1931
- │ --- │
1932
- │ u32 │
1933
- ╞═════╡
1934
- │ 0 │
1935
- │ 3 │
1936
- │ 1 │
1937
- │ 2 │
1938
- └─────┘
1939
-
1940
- Compute the arg sort by multiple columns by either passing a list of columns, or by
1941
- specifying each column as a positional argument.
1942
-
1943
- >>> df.select(pl.arg_sort_by(["a", "b"], descending=True))
1944
- shape: (4, 1)
1945
- ┌─────┐
1946
- │ a │
1947
- │ --- │
1948
- │ u32 │
1949
- ╞═════╡
1950
- │ 2 │
1951
- │ 1 │
1952
- │ 0 │
1953
- │ 3 │
1954
- └─────┘
1955
-
1956
- Use gather to apply the arg sort to other columns.
1957
-
1958
- >>> df.select(pl.col("c").gather(pl.arg_sort_by("a")))
1959
- shape: (4, 1)
1960
- ┌─────┐
1961
- │ c │
1962
- │ --- │
1963
- │ i64 │
1964
- ╞═════╡
1965
- │ 1 │
1966
- │ 4 │
1967
- │ 2 │
1968
- │ 3 │
1969
- └─────┘
1970
- """
1971
- exprs = parse_into_list_of_expressions(exprs, *more_exprs)
1972
- descending = extend_bool(descending, len(exprs), "descending", "exprs")
1973
- nulls_last = extend_bool(nulls_last, len(exprs), "nulls_last", "exprs")
1974
- return wrap_expr(
1975
- plr.arg_sort_by(exprs, descending, nulls_last, multithreaded, maintain_order)
1976
- )
1977
-
1978
-
1979
- @deprecate_streaming_parameter()
1980
- @forward_old_opt_flags()
1981
- def collect_all(
1982
- lazy_frames: Iterable[LazyFrame],
1983
- *,
1984
- type_coercion: bool = True,
1985
- predicate_pushdown: bool = True,
1986
- projection_pushdown: bool = True,
1987
- simplify_expression: bool = True,
1988
- no_optimization: bool = False,
1989
- slice_pushdown: bool = True,
1990
- comm_subplan_elim: bool = True,
1991
- comm_subexpr_elim: bool = True,
1992
- cluster_with_columns: bool = True,
1993
- collapse_joins: bool = True,
1994
- optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS,
1995
- engine: EngineType = "auto",
1996
- ) -> list[DataFrame]:
1997
- """
1998
- Collect multiple LazyFrames at the same time.
1999
-
2000
- This can run all the computation graphs in parallel or combined.
2001
-
2002
- Common Subplan Elimination is applied on the combined plan, meaning
2003
- that diverging queries will run only once.
2004
-
2005
- Parameters
2006
- ----------
2007
- lazy_frames
2008
- A list of LazyFrames to collect.
2009
- type_coercion
2010
- Do type coercion optimization.
2011
-
2012
- .. deprecated:: 1.30.0
2013
- Use the `optimizations` parameters.
2014
- predicate_pushdown
2015
- Do predicate pushdown optimization.
2016
-
2017
- .. deprecated:: 1.30.0
2018
- Use the `optimizations` parameters.
2019
- projection_pushdown
2020
- Do projection pushdown optimization.
2021
-
2022
- .. deprecated:: 1.30.0
2023
- Use the `optimizations` parameters.
2024
- simplify_expression
2025
- Run simplify expressions optimization.
2026
-
2027
- .. deprecated:: 1.30.0
2028
- Use the `optimizations` parameters.
2029
- no_optimization
2030
- Turn off optimizations.
2031
-
2032
- .. deprecated:: 1.30.0
2033
- Use the `optimizations` parameters.
2034
- slice_pushdown
2035
- Slice pushdown optimization.
2036
-
2037
- .. deprecated:: 1.30.0
2038
- Use the `optimizations` parameters.
2039
- comm_subplan_elim
2040
- Will try to cache branching subplans that occur on self-joins or unions.
2041
-
2042
- .. deprecated:: 1.30.0
2043
- Use the `optimizations` parameters.
2044
- comm_subexpr_elim
2045
- Common subexpressions will be cached and reused.
2046
-
2047
- .. deprecated:: 1.30.0
2048
- Use the `optimizations` parameters.
2049
- cluster_with_columns
2050
- Combine sequential independent calls to with_columns
2051
-
2052
- .. deprecated:: 1.30.0
2053
- Use the `optimizations` parameters.
2054
- collapse_joins
2055
- Collapse a join and filters into a faster join
2056
-
2057
- .. deprecated:: 1.30.0
2058
- Use the `optimizations` parameters.
2059
- optimizations
2060
- The optimization passes done during query optimization.
2061
-
2062
- .. warning::
2063
- This functionality is considered **unstable**. It may be changed
2064
- at any point without it being considered a breaking change.
2065
- engine
2066
- Select the engine used to process the query, optional.
2067
- At the moment, if set to `"auto"` (default), the query
2068
- is run using the polars in-memory engine. Polars will also
2069
- attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
2070
- environment variable. If it cannot run the query using the
2071
- selected engine, the query is run using the polars in-memory
2072
- engine.
2073
-
2074
- .. note::
2075
- The GPU engine does not support async, or running in the
2076
- background. If either are enabled, then GPU execution is switched off.
2077
-
2078
- Returns
2079
- -------
2080
- list of DataFrames
2081
- The collected DataFrames, returned in the same order as the input LazyFrames.
2082
-
2083
- """
2084
- if engine == "streaming":
2085
- issue_unstable_warning("streaming mode is considered unstable.")
2086
-
2087
- lfs = [lf._ldf for lf in lazy_frames]
2088
- out = plr.collect_all(lfs, engine, optimizations._pyoptflags)
2089
-
2090
- # wrap the pydataframes into dataframe
2091
- result = [wrap_df(pydf) for pydf in out]
2092
-
2093
- return result
2094
-
2095
-
2096
- @overload
2097
- def collect_all_async(
2098
- lazy_frames: Iterable[LazyFrame],
2099
- *,
2100
- gevent: Literal[True],
2101
- engine: EngineType = "auto",
2102
- optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS,
2103
- ) -> _GeventDataFrameResult[list[DataFrame]]: ...
2104
-
2105
-
2106
- @overload
2107
- def collect_all_async(
2108
- lazy_frames: Iterable[LazyFrame],
2109
- *,
2110
- gevent: Literal[False] = False,
2111
- engine: EngineType = "auto",
2112
- optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS,
2113
- ) -> Awaitable[list[DataFrame]]: ...
2114
-
2115
-
2116
- @unstable()
2117
- @deprecate_streaming_parameter()
2118
- def collect_all_async(
2119
- lazy_frames: Iterable[LazyFrame],
2120
- *,
2121
- gevent: bool = False,
2122
- engine: EngineType = "auto",
2123
- optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS,
2124
- ) -> Awaitable[list[DataFrame]] | _GeventDataFrameResult[list[DataFrame]]:
2125
- """
2126
- Collect multiple LazyFrames at the same time asynchronously in thread pool.
2127
-
2128
- .. warning::
2129
- This functionality is considered **unstable**. It may be changed
2130
- at any point without it being considered a breaking change.
2131
-
2132
- Collects into a list of DataFrame (like :func:`polars.collect_all`),
2133
- but instead of returning them directly, they are scheduled to be collected
2134
- inside thread pool, while this method returns almost instantly.
2135
-
2136
- May be useful if you use gevent or asyncio and want to release control to other
2137
- greenlets/tasks while LazyFrames are being collected.
2138
-
2139
- Parameters
2140
- ----------
2141
- lazy_frames
2142
- A list of LazyFrames to collect.
2143
- gevent
2144
- Return wrapper to `gevent.event.AsyncResult` instead of Awaitable
2145
- optimizations
2146
- The optimization passes done during query optimization.
2147
-
2148
- .. warning::
2149
- This functionality is considered **unstable**. It may be changed
2150
- at any point without it being considered a breaking change.
2151
- engine
2152
- Select the engine used to process the query, optional.
2153
- At the moment, if set to `"auto"` (default), the query
2154
- is run using the polars in-memory engine. Polars will also
2155
- attempt to use the engine set by the `POLARS_ENGINE_AFFINITY`
2156
- environment variable. If it cannot run the query using the
2157
- selected engine, the query is run using the polars in-memory
2158
- engine.
2159
-
2160
- .. note::
2161
- The GPU engine does not support async, or running in the
2162
- background. If either are enabled, then GPU execution is switched off.
2163
-
2164
- See Also
2165
- --------
2166
- polars.collect_all : Collect multiple LazyFrames at the same time.
2167
- LazyFrame.collect_async : To collect single frame.
2168
-
2169
- Notes
2170
- -----
2171
- In case of error `set_exception` is used on
2172
- `asyncio.Future`/`gevent.event.AsyncResult` and will be reraised by them.
2173
-
2174
- Returns
2175
- -------
2176
- If `gevent=False` (default) then returns awaitable.
2177
-
2178
- If `gevent=True` then returns wrapper that has
2179
- `.get(block=True, timeout=None)` method.
2180
- """
2181
- if engine == "streaming":
2182
- issue_unstable_warning("streaming mode is considered unstable.")
2183
-
2184
- result: (
2185
- _GeventDataFrameResult[list[DataFrame]] | _AioDataFrameResult[list[DataFrame]]
2186
- ) = _GeventDataFrameResult() if gevent else _AioDataFrameResult()
2187
- lfs = [lf._ldf for lf in lazy_frames]
2188
- plr.collect_all_with_callback(
2189
- lfs, engine, optimizations._pyoptflags, result._callback_all
2190
- )
2191
- return result
2192
-
2193
-
2194
- @unstable()
2195
- def explain_all(
2196
- lazy_frames: Iterable[LazyFrame],
2197
- *,
2198
- optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS,
2199
- ) -> str:
2200
- """
2201
- Explain multiple LazyFrames as if passed to `collect_all`.
2202
-
2203
- Common Subplan Elimination is applied on the combined plan, meaning
2204
- that diverging queries will run only once.
2205
-
2206
- Parameters
2207
- ----------
2208
- lazy_frames
2209
- A list of LazyFrames to collect.
2210
- optimizations
2211
- The optimization passes done during query optimization.
2212
-
2213
- .. warning::
2214
- This functionality is considered **unstable**. It may be changed
2215
- at any point without it being considered a breaking change.
2216
-
2217
- Returns
2218
- -------
2219
- Explained plan.
2220
- """
2221
- lfs = [lf._ldf for lf in lazy_frames]
2222
- return plr.explain_all(lfs, optimizations._pyoptflags)
2223
-
2224
-
2225
- @overload
2226
- def select(
2227
- *exprs: IntoExpr | Iterable[IntoExpr],
2228
- eager: Literal[True] = ...,
2229
- **named_exprs: IntoExpr,
2230
- ) -> DataFrame: ...
2231
-
2232
-
2233
- @overload
2234
- def select(
2235
- *exprs: IntoExpr | Iterable[IntoExpr],
2236
- eager: Literal[False],
2237
- **named_exprs: IntoExpr,
2238
- ) -> LazyFrame: ...
2239
-
2240
-
2241
- def select(
2242
- *exprs: IntoExpr | Iterable[IntoExpr], eager: bool = True, **named_exprs: IntoExpr
2243
- ) -> DataFrame | LazyFrame:
2244
- """
2245
- Run polars expressions without a context.
2246
-
2247
- This is syntactic sugar for running `df.select` on an empty DataFrame
2248
- (or LazyFrame if eager=False).
2249
-
2250
- Parameters
2251
- ----------
2252
- *exprs
2253
- Column(s) to select, specified as positional arguments.
2254
- Accepts expression input. Strings are parsed as column names,
2255
- other non-expression inputs are parsed as literals.
2256
- eager
2257
- Evaluate immediately and return a `DataFrame` (default); if set to `False`,
2258
- return a `LazyFrame` instead.
2259
- **named_exprs
2260
- Additional columns to select, specified as keyword arguments.
2261
- The columns will be renamed to the keyword used.
2262
-
2263
- Returns
2264
- -------
2265
- DataFrame or LazyFrame
2266
-
2267
- Examples
2268
- --------
2269
- >>> foo = pl.Series("foo", [1, 2, 3])
2270
- >>> bar = pl.Series("bar", [3, 2, 1])
2271
- >>> pl.select(min=pl.min_horizontal(foo, bar))
2272
- shape: (3, 1)
2273
- ┌─────┐
2274
- │ min │
2275
- │ --- │
2276
- │ i64 │
2277
- ╞═════╡
2278
- │ 1 │
2279
- │ 2 │
2280
- │ 1 │
2281
- └─────┘
2282
-
2283
- >>> pl.select(pl.int_range(0, 100_000, 2).alias("n"), eager=False).filter(
2284
- ... pl.col("n") % 22_500 == 0
2285
- ... ).collect()
2286
- shape: (5, 1)
2287
- ┌───────┐
2288
- │ n │
2289
- │ --- │
2290
- │ i64 │
2291
- ╞═══════╡
2292
- │ 0 │
2293
- │ 22500 │
2294
- │ 45000 │
2295
- │ 67500 │
2296
- │ 90000 │
2297
- └───────┘
2298
- """
2299
- empty_frame = pl.DataFrame() if eager else pl.LazyFrame()
2300
- return empty_frame.select(*exprs, **named_exprs)
2301
-
2302
-
2303
- @overload
2304
- def arg_where(condition: Expr | Series, *, eager: Literal[False] = ...) -> Expr: ...
2305
-
2306
-
2307
- @overload
2308
- def arg_where(condition: Expr | Series, *, eager: Literal[True]) -> Series: ...
2309
-
2310
-
2311
- def arg_where(condition: Expr | Series, *, eager: bool = False) -> Expr | Series:
2312
- """
2313
- Return indices where `condition` evaluates `True`.
2314
-
2315
- Parameters
2316
- ----------
2317
- condition
2318
- Boolean expression to evaluate
2319
- eager
2320
- Evaluate immediately and return a `Series`; this requires that the given
2321
- condition is itself a `Series`. If set to `False` (default), return
2322
- an expression instead.
2323
-
2324
- See Also
2325
- --------
2326
- Series.arg_true : Return indices where Series is True
2327
-
2328
- Examples
2329
- --------
2330
- >>> df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
2331
- >>> df.select(
2332
- ... [
2333
- ... pl.arg_where(pl.col("a") % 2 == 0),
2334
- ... ]
2335
- ... ).to_series()
2336
- shape: (2,)
2337
- Series: 'a' [u32]
2338
- [
2339
- 1
2340
- 3
2341
- ]
2342
- """
2343
- if eager:
2344
- if not isinstance(condition, pl.Series):
2345
- msg = (
2346
- "expected Series in 'arg_where' if 'eager=True', got"
2347
- f" {type(condition).__name__!r}"
2348
- )
2349
- raise ValueError(msg)
2350
- return condition.to_frame().select(arg_where(F.col(condition.name))).to_series()
2351
- else:
2352
- condition_pyexpr = parse_into_expression(condition)
2353
- return wrap_expr(plr.arg_where(condition_pyexpr))
2354
-
2355
-
2356
- @overload
2357
- def coalesce(
2358
- exprs: IntoExpr | Iterable[IntoExpr],
2359
- *more_exprs: IntoExpr,
2360
- eager: Literal[False] = ...,
2361
- ) -> Expr: ...
2362
-
2363
-
2364
- @overload
2365
- def coalesce(
2366
- exprs: IntoExpr | Iterable[IntoExpr],
2367
- *more_exprs: IntoExpr,
2368
- eager: Literal[True],
2369
- ) -> Series: ...
2370
-
2371
-
2372
- @overload
2373
- def coalesce(
2374
- exprs: IntoExpr | Iterable[IntoExpr],
2375
- *more_exprs: IntoExpr,
2376
- eager: bool,
2377
- ) -> Expr | Series: ...
2378
-
2379
-
2380
- def coalesce(
2381
- exprs: IntoExpr | Iterable[IntoExpr],
2382
- *more_exprs: IntoExpr,
2383
- eager: bool = False,
2384
- ) -> Expr | Series:
2385
- """
2386
- Folds the columns from left to right, keeping the first non-null value.
2387
-
2388
- Parameters
2389
- ----------
2390
- exprs
2391
- Columns to coalesce. Accepts expression input. Strings are parsed as column
2392
- names, other non-expression inputs are parsed as literals.
2393
- *more_exprs
2394
- Additional columns to coalesce, specified as positional arguments.
2395
- eager
2396
- Evaluate immediately and return a `Series`; this requires that at least one
2397
- of the given arguments is a `Series`. If set to `False` (default), return
2398
- an expression instead.
2399
-
2400
- Examples
2401
- --------
2402
- >>> df = pl.DataFrame(
2403
- ... {
2404
- ... "a": [1, None, None, None],
2405
- ... "b": [1, 2, None, None],
2406
- ... "c": [5, None, 3, None],
2407
- ... }
2408
- ... )
2409
-
2410
- >>> df.with_columns(pl.coalesce("a", "b", "c", 10).alias("d"))
2411
- shape: (4, 4)
2412
- ┌──────┬──────┬──────┬─────┐
2413
- │ a ┆ b ┆ c ┆ d │
2414
- │ --- ┆ --- ┆ --- ┆ --- │
2415
- │ i64 ┆ i64 ┆ i64 ┆ i64 │
2416
- ╞══════╪══════╪══════╪═════╡
2417
- │ 1 ┆ 1 ┆ 5 ┆ 1 │
2418
- │ null ┆ 2 ┆ null ┆ 2 │
2419
- │ null ┆ null ┆ 3 ┆ 3 │
2420
- │ null ┆ null ┆ null ┆ 10 │
2421
- └──────┴──────┴──────┴─────┘
2422
-
2423
- >>> df.with_columns(pl.coalesce(pl.col(["a", "b", "c"]), 10.0).alias("d"))
2424
- shape: (4, 4)
2425
- ┌──────┬──────┬──────┬──────┐
2426
- │ a ┆ b ┆ c ┆ d │
2427
- │ --- ┆ --- ┆ --- ┆ --- │
2428
- │ i64 ┆ i64 ┆ i64 ┆ f64 │
2429
- ╞══════╪══════╪══════╪══════╡
2430
- │ 1 ┆ 1 ┆ 5 ┆ 1.0 │
2431
- │ null ┆ 2 ┆ null ┆ 2.0 │
2432
- │ null ┆ null ┆ 3 ┆ 3.0 │
2433
- │ null ┆ null ┆ null ┆ 10.0 │
2434
- └──────┴──────┴──────┴──────┘
2435
-
2436
- >>> s1 = pl.Series("a", [None, 2, None])
2437
- >>> s2 = pl.Series("b", [1, None, 3])
2438
- >>> pl.coalesce(s1, s2, eager=True)
2439
- shape: (3,)
2440
- Series: 'a' [i64]
2441
- [
2442
- 1
2443
- 2
2444
- 3
2445
- ]
2446
- """
2447
- if eager:
2448
- exprs = [exprs, *more_exprs]
2449
- if not (series := [e for e in exprs if isinstance(e, pl.Series)]):
2450
- msg = "expected at least one Series in 'coalesce' if 'eager=True'"
2451
- raise ValueError(msg)
2452
-
2453
- exprs = [(e.name if isinstance(e, pl.Series) else e) for e in exprs]
2454
- return pl.DataFrame(series).select(coalesce(exprs, eager=False)).to_series()
2455
- else:
2456
- exprs = parse_into_list_of_expressions(exprs, *more_exprs)
2457
- return wrap_expr(plr.coalesce(exprs))
2458
-
2459
-
2460
- @overload
2461
- def from_epoch(column: str | Expr, time_unit: EpochTimeUnit = ...) -> Expr: ...
2462
-
2463
-
2464
- @overload
2465
- def from_epoch(
2466
- column: Series | Sequence[int], time_unit: EpochTimeUnit = ...
2467
- ) -> Series: ...
2468
-
2469
-
2470
- def from_epoch(
2471
- column: str | Expr | Series | Sequence[int], time_unit: EpochTimeUnit = "s"
2472
- ) -> Expr | Series:
2473
- """
2474
- Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
2475
-
2476
- Depending on the `time_unit` provided, this function will return a different dtype:
2477
-
2478
- - time_unit="d" returns pl.Date
2479
- - time_unit="s" returns pl.Datetime["us"] (pl.Datetime's default)
2480
- - time_unit="ms" returns pl.Datetime["ms"]
2481
- - time_unit="us" returns pl.Datetime["us"]
2482
- - time_unit="ns" returns pl.Datetime["ns"]
2483
-
2484
- Parameters
2485
- ----------
2486
- column
2487
- Series or expression to parse integers to pl.Datetime.
2488
- time_unit
2489
- The unit of time of the timesteps since epoch time.
2490
-
2491
- Examples
2492
- --------
2493
- >>> df = pl.DataFrame({"timestamp": [1666683077, 1666683099]}).lazy()
2494
- >>> df.select(pl.from_epoch(pl.col("timestamp"), time_unit="s")).collect()
2495
- shape: (2, 1)
2496
- ┌─────────────────────┐
2497
- │ timestamp │
2498
- │ --- │
2499
- │ datetime[μs] │
2500
- ╞═════════════════════╡
2501
- │ 2022-10-25 07:31:17 │
2502
- │ 2022-10-25 07:31:39 │
2503
- └─────────────────────┘
2504
-
2505
- The function can also be used in an eager context by passing a Series.
2506
-
2507
- >>> s = pl.Series([12345, 12346])
2508
- >>> pl.from_epoch(s, time_unit="d")
2509
- shape: (2,)
2510
- Series: '' [date]
2511
- [
2512
- 2003-10-20
2513
- 2003-10-21
2514
- ]
2515
- """
2516
- if isinstance(column, str):
2517
- column = F.col(column)
2518
- elif not isinstance(column, (pl.Series, pl.Expr)):
2519
- column = pl.Series(column) # Sequence input handled by Series constructor
2520
-
2521
- if time_unit == "d":
2522
- return column.cast(Date)
2523
- elif time_unit == "s":
2524
- return (column.cast(Int64) * 1_000_000).cast(Datetime("us"))
2525
- elif time_unit in DTYPE_TEMPORAL_UNITS:
2526
- return column.cast(Datetime(time_unit))
2527
- else:
2528
- msg = f"`time_unit` must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got {time_unit!r}"
2529
- raise ValueError(msg)
2530
-
2531
-
2532
- @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0")
2533
- def rolling_cov(
2534
- a: str | Expr,
2535
- b: str | Expr,
2536
- *,
2537
- window_size: int,
2538
- min_samples: int | None = None,
2539
- ddof: int = 1,
2540
- ) -> Expr:
2541
- """
2542
- Compute the rolling covariance between two columns/ expressions.
2543
-
2544
- The window at a given row includes the row itself and the
2545
- `window_size - 1` elements before it.
2546
-
2547
- .. versionchanged:: 1.21.0
2548
- The `min_periods` parameter was renamed `min_samples`.
2549
-
2550
- Parameters
2551
- ----------
2552
- a
2553
- Column name or Expression.
2554
- b
2555
- Column name or Expression.
2556
- window_size
2557
- The length of the window.
2558
- min_samples
2559
- The number of values in the window that should be non-null before computing
2560
- a result. If None, it will be set equal to window size.
2561
- ddof
2562
- Delta degrees of freedom. The divisor used in calculations
2563
- is `N - ddof`, where `N` represents the number of elements.
2564
- """
2565
- if min_samples is None:
2566
- min_samples = window_size
2567
- if isinstance(a, str):
2568
- a = F.col(a)
2569
- if isinstance(b, str):
2570
- b = F.col(b)
2571
- return wrap_expr(
2572
- plr.rolling_cov(a._pyexpr, b._pyexpr, window_size, min_samples, ddof)
2573
- )
2574
-
2575
-
2576
- @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0")
2577
- def rolling_corr(
2578
- a: str | Expr,
2579
- b: str | Expr,
2580
- *,
2581
- window_size: int,
2582
- min_samples: int | None = None,
2583
- ddof: int = 1,
2584
- ) -> Expr:
2585
- """
2586
- Compute the rolling correlation between two columns/ expressions.
2587
-
2588
- The window at a given row includes the row itself and the
2589
- `window_size - 1` elements before it.
2590
-
2591
- .. versionchanged:: 1.21.0
2592
- The `min_periods` parameter was renamed `min_samples`.
2593
-
2594
- Parameters
2595
- ----------
2596
- a
2597
- Column name or Expression.
2598
- b
2599
- Column name or Expression.
2600
- window_size
2601
- The length of the window.
2602
- min_samples
2603
- The number of values in the window that should be non-null before computing
2604
- a result. If None, it will be set equal to window size.
2605
- ddof
2606
- Delta degrees of freedom. The divisor used in calculations
2607
- is `N - ddof`, where `N` represents the number of elements.
2608
- """
2609
- if min_samples is None:
2610
- min_samples = window_size
2611
- if isinstance(a, str):
2612
- a = F.col(a)
2613
- if isinstance(b, str):
2614
- b = F.col(b)
2615
- return wrap_expr(
2616
- plr.rolling_corr(a._pyexpr, b._pyexpr, window_size, min_samples, ddof)
2617
- )
2618
-
2619
-
2620
- @overload
2621
- def sql_expr(sql: str) -> Expr: # type: ignore[overload-overlap]
2622
- ...
2623
-
2624
-
2625
- @overload
2626
- def sql_expr(sql: Sequence[str]) -> list[Expr]: ...
2627
-
2628
-
2629
- def sql_expr(sql: str | Sequence[str]) -> Expr | list[Expr]:
2630
- """
2631
- Parse one or more SQL expressions to Polars expression(s).
2632
-
2633
- Parameters
2634
- ----------
2635
- sql
2636
- One or more SQL expressions.
2637
-
2638
- Examples
2639
- --------
2640
- Parse a single SQL expression:
2641
-
2642
- >>> df = pl.DataFrame({"a": [2, 1]})
2643
- >>> expr = pl.sql_expr("MAX(a)")
2644
- >>> df.select(expr)
2645
- shape: (1, 1)
2646
- ┌─────┐
2647
- │ a │
2648
- │ --- │
2649
- │ i64 │
2650
- ╞═════╡
2651
- │ 2 │
2652
- └─────┘
2653
-
2654
- Parse multiple SQL expressions:
2655
-
2656
- >>> df.with_columns(
2657
- ... *pl.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"]),
2658
- ... )
2659
- shape: (2, 3)
2660
- ┌─────┬─────┬───────┐
2661
- │ a ┆ a_a ┆ a_txt │
2662
- │ --- ┆ --- ┆ --- │
2663
- │ i64 ┆ i64 ┆ str │
2664
- ╞═════╪═════╪═══════╡
2665
- │ 2 ┆ 4 ┆ 2 │
2666
- │ 1 ┆ 1 ┆ 1 │
2667
- └─────┴─────┴───────┘
2668
- """
2669
- if isinstance(sql, str):
2670
- return wrap_expr(plr.sql_expr(sql))
2671
- else:
2672
- return [wrap_expr(plr.sql_expr(q)) for q in sql]
2673
-
2674
-
2675
- @unstable()
2676
- def row_index(name: str = "index") -> pl.Expr:
2677
- """
2678
- Generates a sequence of integers.
2679
-
2680
- The length of the returned sequence will match the context length, and the
2681
- datatype will match the one returned by `get_index_dtype()`.
2682
-
2683
- .. warning::
2684
- This functionality is considered **unstable**. It may be changed
2685
- at any point without it being considered a breaking change.
2686
-
2687
- If you would like to generate sequences with custom offsets / length /
2688
- step size / datatypes, it is recommended to use `int_range` instead.
2689
-
2690
- Parameters
2691
- ----------
2692
- name
2693
- Name of the returned column.
2694
-
2695
- Returns
2696
- -------
2697
- Expr
2698
- Column of integers.
2699
-
2700
- See Also
2701
- --------
2702
- int_range : Generate a range of integers.
2703
-
2704
- Examples
2705
- --------
2706
- >>> df = pl.DataFrame({"x": ["A", "A", "B", "B", "B"]})
2707
- >>> df.with_columns(pl.row_index(), pl.row_index("another_index"))
2708
- shape: (5, 3)
2709
- ┌─────┬───────┬───────────────┐
2710
- │ x ┆ index ┆ another_index │
2711
- │ --- ┆ --- ┆ --- │
2712
- │ str ┆ u32 ┆ u32 │
2713
- ╞═════╪═══════╪═══════════════╡
2714
- │ A ┆ 0 ┆ 0 │
2715
- │ A ┆ 1 ┆ 1 │
2716
- │ B ┆ 2 ┆ 2 │
2717
- │ B ┆ 3 ┆ 3 │
2718
- │ B ┆ 4 ┆ 4 │
2719
- └─────┴───────┴───────────────┘
2720
- >>> df.group_by("x").agg(pl.row_index()).sort("x")
2721
- shape: (2, 2)
2722
- ┌─────┬───────────┐
2723
- │ x ┆ index │
2724
- │ --- ┆ --- │
2725
- │ str ┆ list[u32] │
2726
- ╞═════╪═══════════╡
2727
- │ A ┆ [0, 1] │
2728
- │ B ┆ [0, 1, 2] │
2729
- └─────┴───────────┘
2730
- >>> df.select(pl.row_index())
2731
- shape: (5, 1)
2732
- ┌───────┐
2733
- │ index │
2734
- │ --- │
2735
- │ u32 │
2736
- ╞═══════╡
2737
- │ 0 │
2738
- │ 1 │
2739
- │ 2 │
2740
- │ 3 │
2741
- │ 4 │
2742
- └───────┘
2743
- """
2744
- # Notes
2745
- # * Dispatching to `int_range` means that we cannot accept an offset
2746
- # parameter, as unlike `DataFrame.with_row_index()`, `int_range` will simply
2747
- # truncate instead of raising an error.
2748
- return F.int_range(
2749
- F.len(),
2750
- dtype=get_index_type(),
2751
- ).alias(name)