polars-runtime-compat 1.34.0b3__cp39-abi3-win_arm64.whl → 1.34.0b5__cp39-abi3-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  2. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/METADATA +6 -2
  3. polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  202. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/functions/eager.py DELETED
@@ -1,524 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
- from collections.abc import Generator, Iterator, Sequence
5
- from functools import reduce
6
- from itertools import chain
7
- from typing import TYPE_CHECKING, get_args
8
-
9
- import polars._reexport as pl
10
- from polars import functions as F
11
- from polars._typing import ConcatMethod
12
- from polars._utils.various import ordered_unique, qualified_type_name
13
- from polars._utils.wrap import wrap_df, wrap_expr, wrap_ldf, wrap_s
14
- from polars.exceptions import InvalidOperationError
15
-
16
- with contextlib.suppress(ImportError): # Module not available when building docs
17
- import polars._plr as plr
18
-
19
- if TYPE_CHECKING:
20
- from collections.abc import Iterable
21
-
22
- from polars import DataFrame, Expr, LazyFrame, Series
23
- from polars._typing import FrameType, JoinStrategy, PolarsType
24
-
25
-
26
- def concat(
27
- items: Iterable[PolarsType],
28
- *,
29
- how: ConcatMethod = "vertical",
30
- rechunk: bool = False,
31
- parallel: bool = True,
32
- ) -> PolarsType:
33
- """
34
- Combine multiple DataFrames, LazyFrames, or Series into a single object.
35
-
36
- Parameters
37
- ----------
38
- items
39
- DataFrames, LazyFrames, or Series to concatenate.
40
- how : {'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal', 'align', 'align_full', 'align_inner', 'align_left', 'align_right'}
41
- Note that `Series` only support the `vertical` strategy.
42
-
43
- * vertical: Applies multiple `vstack` operations.
44
- * vertical_relaxed: Same as `vertical`, but additionally coerces columns to
45
- their common supertype *if* they are mismatched (eg: Int32 → Int64).
46
- * diagonal: Finds a union between the column schemas and fills missing column
47
- values with `null`.
48
- * diagonal_relaxed: Same as `diagonal`, but additionally coerces columns to
49
- their common supertype *if* they are mismatched (eg: Int32 → Int64).
50
- * horizontal: Stacks Series from DataFrames horizontally and fills with `null`
51
- if the lengths don't match.
52
- * align, align_full, align_left, align_right: Combines frames horizontally,
53
- auto-determining the common key columns and aligning rows using the same
54
- logic as `align_frames` (note that "align" is an alias for "align_full").
55
- The "align" strategy determines the type of join used to align the frames,
56
- equivalent to the "how" parameter on `align_frames`. Note that the common
57
- join columns are automatically coalesced, but other column collisions
58
- will raise an error (if you need more control over this you should use
59
- a suitable `join` method directly).
60
- rechunk
61
- Make sure that the result data is in contiguous memory.
62
- parallel
63
- Only relevant for LazyFrames. This determines if the concatenated
64
- lazy computations may be executed in parallel.
65
-
66
- Examples
67
- --------
68
- >>> df1 = pl.DataFrame({"a": [1], "b": [3]})
69
- >>> df2 = pl.DataFrame({"a": [2], "b": [4]})
70
- >>> pl.concat([df1, df2]) # default is 'vertical' strategy
71
- shape: (2, 2)
72
- ┌─────┬─────┐
73
- │ a ┆ b │
74
- │ --- ┆ --- │
75
- │ i64 ┆ i64 │
76
- ╞═════╪═════╡
77
- │ 1 ┆ 3 │
78
- │ 2 ┆ 4 │
79
- └─────┴─────┘
80
-
81
- >>> df1 = pl.DataFrame({"a": [1], "b": [3]})
82
- >>> df2 = pl.DataFrame({"a": [2.5], "b": [4]})
83
- >>> pl.concat([df1, df2], how="vertical_relaxed") # 'a' coerced into f64
84
- shape: (2, 2)
85
- ┌─────┬─────┐
86
- │ a ┆ b │
87
- │ --- ┆ --- │
88
- │ f64 ┆ i64 │
89
- ╞═════╪═════╡
90
- │ 1.0 ┆ 3 │
91
- │ 2.5 ┆ 4 │
92
- └─────┴─────┘
93
-
94
- >>> df_h1 = pl.DataFrame({"l1": [1, 2], "l2": [3, 4]})
95
- >>> df_h2 = pl.DataFrame({"r1": [5, 6], "r2": [7, 8], "r3": [9, 10]})
96
- >>> pl.concat([df_h1, df_h2], how="horizontal")
97
- shape: (2, 5)
98
- ┌─────┬─────┬─────┬─────┬─────┐
99
- │ l1 ┆ l2 ┆ r1 ┆ r2 ┆ r3 │
100
- │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
101
- │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
102
- ╞═════╪═════╪═════╪═════╪═════╡
103
- │ 1 ┆ 3 ┆ 5 ┆ 7 ┆ 9 │
104
- │ 2 ┆ 4 ┆ 6 ┆ 8 ┆ 10 │
105
- └─────┴─────┴─────┴─────┴─────┘
106
-
107
- The "diagonal" strategy allows for some frames to have missing columns,
108
- the values for which are filled with `null`:
109
-
110
- >>> df_d1 = pl.DataFrame({"a": [1], "b": [3]})
111
- >>> df_d2 = pl.DataFrame({"a": [2], "c": [4]})
112
- >>> pl.concat([df_d1, df_d2], how="diagonal")
113
- shape: (2, 3)
114
- ┌─────┬──────┬──────┐
115
- │ a ┆ b ┆ c │
116
- │ --- ┆ --- ┆ --- │
117
- │ i64 ┆ i64 ┆ i64 │
118
- ╞═════╪══════╪══════╡
119
- │ 1 ┆ 3 ┆ null │
120
- │ 2 ┆ null ┆ 4 │
121
- └─────┴──────┴──────┘
122
-
123
- The "align" strategies require at least one common column to align on:
124
-
125
- >>> df_a1 = pl.DataFrame({"id": [1, 2], "x": [3, 4]})
126
- >>> df_a2 = pl.DataFrame({"id": [2, 3], "y": [5, 6]})
127
- >>> df_a3 = pl.DataFrame({"id": [1, 3], "z": [7, 8]})
128
- >>> pl.concat([df_a1, df_a2, df_a3], how="align") # equivalent to "align_full"
129
- shape: (3, 4)
130
- ┌─────┬──────┬──────┬──────┐
131
- │ id ┆ x ┆ y ┆ z │
132
- │ --- ┆ --- ┆ --- ┆ --- │
133
- │ i64 ┆ i64 ┆ i64 ┆ i64 │
134
- ╞═════╪══════╪══════╪══════╡
135
- │ 1 ┆ 3 ┆ null ┆ 7 │
136
- │ 2 ┆ 4 ┆ 5 ┆ null │
137
- │ 3 ┆ null ┆ 6 ┆ 8 │
138
- └─────┴──────┴──────┴──────┘
139
- >>> pl.concat([df_a1, df_a2, df_a3], how="align_left")
140
- shape: (2, 4)
141
- ┌─────┬─────┬──────┬──────┐
142
- │ id ┆ x ┆ y ┆ z │
143
- │ --- ┆ --- ┆ --- ┆ --- │
144
- │ i64 ┆ i64 ┆ i64 ┆ i64 │
145
- ╞═════╪═════╪══════╪══════╡
146
- │ 1 ┆ 3 ┆ null ┆ 7 │
147
- │ 2 ┆ 4 ┆ 5 ┆ null │
148
- └─────┴─────┴──────┴──────┘
149
- >>> pl.concat([df_a1, df_a2, df_a3], how="align_right")
150
- shape: (2, 4)
151
- ┌─────┬──────┬──────┬─────┐
152
- │ id ┆ x ┆ y ┆ z │
153
- │ --- ┆ --- ┆ --- ┆ --- │
154
- │ i64 ┆ i64 ┆ i64 ┆ i64 │
155
- ╞═════╪══════╪══════╪═════╡
156
- │ 1 ┆ null ┆ null ┆ 7 │
157
- │ 3 ┆ null ┆ 6 ┆ 8 │
158
- └─────┴──────┴──────┴─────┘
159
- >>> pl.concat([df_a1, df_a2, df_a3], how="align_inner")
160
- shape: (0, 4)
161
- ┌─────┬─────┬─────┬─────┐
162
- │ id ┆ x ┆ y ┆ z │
163
- │ --- ┆ --- ┆ --- ┆ --- │
164
- │ i64 ┆ i64 ┆ i64 ┆ i64 │
165
- ╞═════╪═════╪═════╪═════╡
166
- └─────┴─────┴─────┴─────┘
167
- """ # noqa: W505
168
- # unpack/standardise (handles generator input)
169
- elems = list(items)
170
-
171
- if not elems:
172
- msg = "cannot concat empty list"
173
- raise ValueError(msg)
174
- elif len(elems) == 1 and isinstance(
175
- elems[0], (pl.DataFrame, pl.Series, pl.LazyFrame)
176
- ):
177
- return elems[0]
178
-
179
- if how.startswith("align"):
180
- if not isinstance(elems[0], (pl.DataFrame, pl.LazyFrame)):
181
- msg = f"{how!r} strategy is not supported for {qualified_type_name(elems[0])!r}"
182
- raise TypeError(msg)
183
-
184
- # establish common columns, maintaining the order in which they appear
185
- all_columns = list(chain.from_iterable(e.collect_schema() for e in elems))
186
- key = {v: k for k, v in enumerate(ordered_unique(all_columns))}
187
- output_column_order = list(key)
188
- common_cols = sorted(
189
- reduce(
190
- lambda x, y: set(x) & set(y), # type: ignore[arg-type, return-value]
191
- chain(e.collect_schema() for e in elems),
192
- ),
193
- key=lambda k: key.get(k, 0),
194
- )
195
- # we require at least one key column for 'align' strategies
196
- if not common_cols:
197
- msg = f"{how!r} strategy requires at least one common column"
198
- raise InvalidOperationError(msg)
199
-
200
- # align frame data using a join, with no suffix-resolution (will raise
201
- # a DuplicateError in case of column collision, same as "horizontal")
202
- join_method: JoinStrategy = (
203
- "full" if how == "align" else how.removeprefix("align_") # type: ignore[assignment]
204
- )
205
- lf: LazyFrame = (
206
- reduce(
207
- lambda x, y: (
208
- x.join(
209
- y,
210
- on=common_cols,
211
- how=join_method,
212
- maintain_order="right_left",
213
- coalesce=True,
214
- )
215
- ),
216
- [df.lazy() for df in elems],
217
- )
218
- .sort(by=common_cols, maintain_order=True)
219
- .select(*output_column_order)
220
- )
221
- eager = isinstance(elems[0], pl.DataFrame)
222
- return lf.collect() if eager else lf # type: ignore[return-value]
223
-
224
- out: Series | DataFrame | LazyFrame | Expr
225
- first = elems[0]
226
-
227
- from polars.lazyframe.opt_flags import QueryOptFlags
228
-
229
- if isinstance(first, pl.DataFrame):
230
- if how == "vertical":
231
- out = wrap_df(plr.concat_df(elems))
232
- elif how == "vertical_relaxed":
233
- out = wrap_ldf(
234
- plr.concat_lf(
235
- [df.lazy() for df in elems],
236
- rechunk=rechunk,
237
- parallel=parallel,
238
- to_supertypes=True,
239
- )
240
- ).collect(optimizations=QueryOptFlags._eager())
241
-
242
- elif how == "diagonal":
243
- out = wrap_df(plr.concat_df_diagonal(elems))
244
- elif how == "diagonal_relaxed":
245
- out = wrap_ldf(
246
- plr.concat_lf_diagonal(
247
- [df.lazy() for df in elems],
248
- rechunk=rechunk,
249
- parallel=parallel,
250
- to_supertypes=True,
251
- )
252
- ).collect(optimizations=QueryOptFlags._eager())
253
- elif how == "horizontal":
254
- out = wrap_df(plr.concat_df_horizontal(elems))
255
- else:
256
- allowed = ", ".join(repr(m) for m in get_args(ConcatMethod))
257
- msg = f"DataFrame `how` must be one of {{{allowed}}}, got {how!r}"
258
- raise ValueError(msg)
259
-
260
- elif isinstance(first, pl.LazyFrame):
261
- if how in ("vertical", "vertical_relaxed"):
262
- return wrap_ldf(
263
- plr.concat_lf(
264
- elems,
265
- rechunk=rechunk,
266
- parallel=parallel,
267
- to_supertypes=how.endswith("relaxed"),
268
- )
269
- )
270
- elif how in ("diagonal", "diagonal_relaxed"):
271
- return wrap_ldf(
272
- plr.concat_lf_diagonal(
273
- elems,
274
- rechunk=rechunk,
275
- parallel=parallel,
276
- to_supertypes=how.endswith("relaxed"),
277
- )
278
- )
279
- elif how == "horizontal":
280
- return wrap_ldf(
281
- plr.concat_lf_horizontal(
282
- elems,
283
- parallel=parallel,
284
- )
285
- )
286
- else:
287
- allowed = ", ".join(repr(m) for m in get_args(ConcatMethod))
288
- msg = f"LazyFrame `how` must be one of {{{allowed}}}, got {how!r}"
289
- raise ValueError(msg)
290
-
291
- elif isinstance(first, pl.Series):
292
- if how == "vertical":
293
- out = wrap_s(plr.concat_series(elems))
294
- else:
295
- msg = "Series only supports 'vertical' concat strategy"
296
- raise ValueError(msg)
297
-
298
- elif isinstance(first, pl.Expr):
299
- return wrap_expr(plr.concat_expr([e._pyexpr for e in elems], rechunk))
300
- else:
301
- msg = f"did not expect type: {qualified_type_name(first)!r} in `concat`"
302
- raise TypeError(msg)
303
-
304
- if rechunk:
305
- return out.rechunk()
306
- return out
307
-
308
-
309
- def _alignment_join(
310
- *idx_frames: tuple[int, LazyFrame],
311
- align_on: list[str],
312
- how: JoinStrategy = "full",
313
- descending: bool | Sequence[bool] = False,
314
- ) -> LazyFrame:
315
- """Create a single master frame with all rows aligned on the common key values."""
316
- # note: can stack overflow if the join becomes too large, so we
317
- # collect eagerly when hitting a large enough number of frames
318
- post_align_collect = len(idx_frames) >= 250
319
-
320
- def join_func(
321
- idx_x: tuple[int, LazyFrame],
322
- idx_y: tuple[int, LazyFrame],
323
- ) -> tuple[int, LazyFrame]:
324
- (_, x), (y_idx, y) = idx_x, idx_y
325
- return y_idx, x.join(
326
- y,
327
- how=how,
328
- on=align_on,
329
- suffix=f":{y_idx}",
330
- nulls_equal=True,
331
- coalesce=True,
332
- maintain_order="right_left",
333
- )
334
-
335
- from polars.lazyframe import QueryOptFlags
336
-
337
- joined = reduce(join_func, idx_frames)[1].sort(
338
- by=align_on, descending=descending, maintain_order=True
339
- )
340
- if post_align_collect:
341
- joined = joined.collect(optimizations=QueryOptFlags.none()).lazy()
342
- return joined
343
-
344
-
345
- def align_frames(
346
- *frames: FrameType | Iterable[FrameType],
347
- on: str | Expr | Sequence[str] | Sequence[Expr] | Sequence[str | Expr],
348
- how: JoinStrategy = "full",
349
- select: str | Expr | Sequence[str | Expr] | None = None,
350
- descending: bool | Sequence[bool] = False,
351
- ) -> list[FrameType]:
352
- r"""
353
- Align a sequence of frames using common values from one or more columns as a key.
354
-
355
- Frames that do not contain the given key values have rows injected (with nulls
356
- filling the non-key columns), and each resulting frame is sorted by the key.
357
-
358
- The original column order of input frames is not changed unless `select` is
359
- specified (in which case the final column order is determined from that). In the
360
- case where duplicate key values exist, the alignment behaviour is determined by
361
- the given alignment strategy specified in the `how` parameter (by default this
362
- is a full outer join, but if your data is suitable you can get a large speedup
363
- by setting `how="left"` instead).
364
-
365
- Note that this function does not result in a joined frame - you receive the same
366
- number of frames back that you passed in, but each is now aligned by key and has
367
- the same number of rows.
368
-
369
- Parameters
370
- ----------
371
- frames
372
- Sequence of DataFrames or LazyFrames.
373
- on
374
- One or more columns whose unique values will be used to align the frames.
375
- select
376
- Optional post-alignment column select to constrain and/or order
377
- the columns returned from the newly aligned frames.
378
- descending
379
- Sort the alignment column values in descending order; can be a single
380
- boolean or a list of booleans associated with each column in `on`.
381
- how
382
- By default the row alignment values are determined using a full outer join
383
- strategy across all frames; if you know that the first frame contains all
384
- required keys, you can set `how="left"` for a large performance increase.
385
-
386
- Examples
387
- --------
388
- >>> from datetime import date
389
- >>> df1 = pl.DataFrame(
390
- ... {
391
- ... "dt": [date(2022, 9, 1), date(2022, 9, 2), date(2022, 9, 3)],
392
- ... "x": [3.5, 4.0, 1.0],
393
- ... "y": [10.0, 2.5, 1.5],
394
- ... }
395
- ... )
396
- >>> df2 = pl.DataFrame(
397
- ... {
398
- ... "dt": [date(2022, 9, 2), date(2022, 9, 3), date(2022, 9, 1)],
399
- ... "x": [8.0, 1.0, 3.5],
400
- ... "y": [1.5, 12.0, 5.0],
401
- ... }
402
- ... )
403
- >>> df3 = pl.DataFrame(
404
- ... {
405
- ... "dt": [date(2022, 9, 3), date(2022, 9, 2)],
406
- ... "x": [2.0, 5.0],
407
- ... "y": [2.5, 2.0],
408
- ... }
409
- ... ) # doctest: +IGNORE_RESULT
410
- >>> pl.Config.set_tbl_formatting("UTF8_FULL") # doctest: +IGNORE_RESULT
411
- #
412
- # df1 df2 df3
413
- # shape: (3, 3) shape: (3, 3) shape: (2, 3)
414
- # ┌────────────┬─────┬──────┐ ┌────────────┬─────┬──────┐ ┌────────────┬─────┬─────┐
415
- # │ dt ┆ x ┆ y │ │ dt ┆ x ┆ y │ │ dt ┆ x ┆ y │
416
- # │ --- ┆ --- ┆ --- │ │ --- ┆ --- ┆ --- │ │ --- ┆ --- ┆ --- │
417
- # │ date ┆ f64 ┆ f64 │ │ date ┆ f64 ┆ f64 │ │ date ┆ f64 ┆ f64 │
418
- # ╞════════════╪═════╪══════╡ ╞════════════╪═════╪══════╡ ╞════════════╪═════╪═════╡
419
- # │ 2022-09-01 ┆ 3.5 ┆ 10.0 │\ ,->│ 2022-09-02 ┆ 8.0 ┆ 1.5 │\ ,->│ 2022-09-03 ┆ 2.0 ┆ 2.5 │
420
- # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ \/ ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ \/ ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
421
- # │ 2022-09-02 ┆ 4.0 ┆ 2.5 │_/\,->│ 2022-09-03 ┆ 1.0 ┆ 12.0 │_/`-->│ 2022-09-02 ┆ 5.0 ┆ 2.0 │
422
- # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ /\ ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ └────────────┴─────┴─────┘
423
- # │ 2022-09-03 ┆ 1.0 ┆ 1.5 │_/ `>│ 2022-09-01 ┆ 3.5 ┆ 5.0 │-//-
424
- # └────────────┴─────┴──────┘ └────────────┴─────┴──────┘
425
- ...
426
-
427
- Align frames by the "dt" column:
428
-
429
- >>> af1, af2, af3 = pl.align_frames(
430
- ... df1, df2, df3, on="dt"
431
- ... ) # doctest: +IGNORE_RESULT
432
- #
433
- # df1 df2 df3
434
- # shape: (3, 3) shape: (3, 3) shape: (3, 3)
435
- # ┌────────────┬─────┬──────┐ ┌────────────┬─────┬──────┐ ┌────────────┬──────┬──────┐
436
- # │ dt ┆ x ┆ y │ │ dt ┆ x ┆ y │ │ dt ┆ x ┆ y │
437
- # │ --- ┆ --- ┆ --- │ │ --- ┆ --- ┆ --- │ │ --- ┆ --- ┆ --- │
438
- # │ date ┆ f64 ┆ f64 │ │ date ┆ f64 ┆ f64 │ │ date ┆ f64 ┆ f64 │
439
- # ╞════════════╪═════╪══════╡ ╞════════════╪═════╪══════╡ ╞════════════╪══════╪══════╡
440
- # │ 2022-09-01 ┆ 3.5 ┆ 10.0 │----->│ 2022-09-01 ┆ 3.5 ┆ 5.0 │----->│ 2022-09-01 ┆ null ┆ null │
441
- # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
442
- # │ 2022-09-02 ┆ 4.0 ┆ 2.5 │----->│ 2022-09-02 ┆ 8.0 ┆ 1.5 │----->│ 2022-09-02 ┆ 5.0 ┆ 2.0 │
443
- # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
444
- # │ 2022-09-03 ┆ 1.0 ┆ 1.5 │----->│ 2022-09-03 ┆ 1.0 ┆ 12.0 │----->│ 2022-09-03 ┆ 2.0 ┆ 2.5 │
445
- # └────────────┴─────┴──────┘ └────────────┴─────┴──────┘ └────────────┴──────┴──────┘
446
- ...
447
-
448
- Align frames by "dt" using "left" alignment, but keep only cols "x" and "y":
449
-
450
- >>> af1, af2, af3 = pl.align_frames(
451
- ... df1, df2, df3, on="dt", select=["x", "y"], how="left"
452
- ... ) # doctest: +IGNORE_RESULT
453
- #
454
- # af1 af2 af3
455
- # shape: (3, 3) shape: (3, 3) shape: (3, 3)
456
- # ┌─────┬──────┐ ┌─────┬──────┐ ┌──────┬──────┐
457
- # │ x ┆ y │ │ x ┆ y │ │ x ┆ y │
458
- # │ --- ┆ --- │ │ --- ┆ --- │ │ --- ┆ --- │
459
- # │ f64 ┆ f64 │ │ f64 ┆ f64 │ │ f64 ┆ f64 │
460
- # ╞═════╪══════╡ ╞═════╪══════╡ ╞══════╪══════╡
461
- # │ 3.5 ┆ 10.0 │ │ 3.5 ┆ 5.0 │ │ null ┆ null │
462
- # ├╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
463
- # │ 4.0 ┆ 2.5 │ │ 8.0 ┆ 1.5 │ │ 5.0 ┆ 2.0 │
464
- # ├╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌┼╌╌╌╌╌╌┤ ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
465
- # │ 1.0 ┆ 1.5 │ │ 1.0 ┆ 12.0 │ │ 2.0 ┆ 2.5 │
466
- # └─────┴──────┘ └─────┴──────┘ └──────┴──────┘
467
- ...
468
-
469
- Now data is aligned, and you can easily calculate the row-wise dot product:
470
-
471
- >>> (af1 * af2 * af3).fill_null(0).select(pl.sum_horizontal("*").alias("dot"))
472
- shape: (3, 1)
473
- ┌───────┐
474
- │ dot │
475
- │ --- │
476
- │ f64 │
477
- ╞═══════╡
478
- │ 0.0 │
479
- ├╌╌╌╌╌╌╌┤
480
- │ 167.5 │
481
- ├╌╌╌╌╌╌╌┤
482
- │ 47.0 │
483
- └───────┘
484
- """ # noqa: W505
485
- if not frames:
486
- return []
487
-
488
- if len(frames) == 1 and not isinstance(frames[0], (pl.DataFrame, pl.LazyFrame)):
489
- frames = frames[0] # type: ignore[assignment]
490
- if isinstance(frames, (Generator, Iterator)):
491
- frames = tuple(frames)
492
-
493
- if len({type(f) for f in frames}) != 1:
494
- msg = (
495
- "input frames must be of a consistent type (all LazyFrame or all DataFrame)"
496
- )
497
- raise TypeError(msg)
498
-
499
- eager = isinstance(frames[0], pl.DataFrame)
500
- on = [on] if (isinstance(on, str) or not isinstance(on, Sequence)) else on
501
- align_on = [(c.meta.output_name() if isinstance(c, pl.Expr) else c) for c in on]
502
-
503
- # create aligned master frame (this is the most expensive part; after
504
- # we just select out the columns representing the component frames)
505
- idx_frames = [(idx, frame.lazy()) for idx, frame in enumerate(frames)] # type: ignore[union-attr]
506
- alignment_frame = _alignment_join(
507
- *idx_frames, align_on=align_on, how=how, descending=descending
508
- )
509
-
510
- # select-out aligned components from the master frame
511
- aligned_cols = set(alignment_frame.collect_schema())
512
- aligned_frames = []
513
- for idx, lf in idx_frames:
514
- sfx = f":{idx}"
515
- df_cols = [
516
- F.col(f"{c}{sfx}").alias(c) if f"{c}{sfx}" in aligned_cols else F.col(c)
517
- for c in lf.collect_schema()
518
- ]
519
- f = alignment_frame.select(*df_cols)
520
- if select is not None:
521
- f = f.select(select)
522
- aligned_frames.append(f)
523
-
524
- return F.collect_all(aligned_frames) if eager else aligned_frames # type: ignore[return-value]
@@ -1,29 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import contextlib
4
-
5
- from polars._utils.various import qualified_type_name
6
-
7
- with contextlib.suppress(ImportError): # Module not available when building docs
8
- import polars._plr as plr
9
- import polars._reexport as pl
10
-
11
-
12
- def escape_regex(s: str) -> str:
13
- r"""
14
- Escapes string regex meta characters.
15
-
16
- Parameters
17
- ----------
18
- s
19
- The string whose meta characters will be escaped.
20
-
21
- """
22
- if isinstance(s, pl.Expr):
23
- msg = "escape_regex function is unsupported for `Expr`, you may want use `Expr.str.escape_regex` instead"
24
- raise TypeError(msg)
25
- elif not isinstance(s, str):
26
- msg = f"escape_regex function supports only `str` type, got `{qualified_type_name(s)}`"
27
- raise TypeError(msg)
28
-
29
- return plr.escape_regex(s)