polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,782 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import os
5
+ import re
6
+ import sys
7
+ import warnings
8
+ from collections import Counter
9
+ from collections.abc import (
10
+ Collection,
11
+ Generator,
12
+ Iterable,
13
+ MappingView,
14
+ Sequence,
15
+ Sized,
16
+ )
17
+ from enum import Enum
18
+ from io import BytesIO
19
+ from pathlib import Path
20
+ from typing import (
21
+ TYPE_CHECKING,
22
+ Any,
23
+ Callable,
24
+ Literal,
25
+ TypeVar,
26
+ overload,
27
+ )
28
+
29
+ import polars as pl
30
+ from polars import functions as F
31
+ from polars._dependencies import _check_for_numpy, import_optional, subprocess
32
+ from polars._dependencies import numpy as np
33
+ from polars.datatypes import (
34
+ Boolean,
35
+ Date,
36
+ Datetime,
37
+ Decimal,
38
+ Duration,
39
+ Int64,
40
+ String,
41
+ Time,
42
+ )
43
+ from polars.datatypes.group import FLOAT_DTYPES, INTEGER_DTYPES
44
+
45
+ if TYPE_CHECKING:
46
+ from collections.abc import Iterator, MutableMapping, Reversible
47
+
48
+ from polars import DataFrame, Expr
49
+ from polars._typing import PolarsDataType, SizeUnit
50
+
51
+ if sys.version_info >= (3, 13):
52
+ from typing import TypeIs
53
+ else:
54
+ from typing_extensions import TypeIs
55
+
56
+ if sys.version_info >= (3, 10):
57
+ from typing import ParamSpec, TypeGuard
58
+ else:
59
+ from typing_extensions import ParamSpec, TypeGuard
60
+
61
+ P = ParamSpec("P")
62
+ T = TypeVar("T")
63
+
64
+ # note: reversed views don't match as instances of MappingView
65
+ if sys.version_info >= (3, 11):
66
+ _views: list[Reversible[Any]] = [{}.keys(), {}.values(), {}.items()]
67
+ _reverse_mapping_views = tuple(type(reversed(view)) for view in _views)
68
+
69
+
70
+ def _process_null_values(
71
+ null_values: None | str | Sequence[str] | dict[str, str] = None,
72
+ ) -> None | str | Sequence[str] | list[tuple[str, str]]:
73
+ if isinstance(null_values, dict):
74
+ return list(null_values.items())
75
+ else:
76
+ return null_values
77
+
78
+
79
+ def _is_generator(val: object | Iterator[T]) -> TypeIs[Iterator[T]]:
80
+ return (
81
+ (isinstance(val, (Generator, Iterable)) and not isinstance(val, Sized))
82
+ or isinstance(val, MappingView)
83
+ or (sys.version_info >= (3, 11) and isinstance(val, _reverse_mapping_views))
84
+ )
85
+
86
+
87
+ def _is_iterable_of(val: Iterable[object], eltype: type | tuple[type, ...]) -> bool:
88
+ """Check whether the given iterable is of the given type(s)."""
89
+ return all(isinstance(x, eltype) for x in val)
90
+
91
+
92
+ def is_path_or_str_sequence(
93
+ val: object, *, allow_str: bool = False, include_series: bool = False
94
+ ) -> TypeGuard[Sequence[str | Path]]:
95
+ """
96
+ Check that `val` is a sequence of strings or paths.
97
+
98
+ Note that a single string is a sequence of strings by definition, use
99
+ `allow_str=False` to return False on a single string.
100
+ """
101
+ if allow_str is False and isinstance(val, str):
102
+ return False
103
+ elif _check_for_numpy(val) and isinstance(val, np.ndarray):
104
+ return np.issubdtype(val.dtype, np.str_)
105
+ elif include_series and isinstance(val, pl.Series):
106
+ return val.dtype == pl.String
107
+ return (
108
+ not isinstance(val, bytes)
109
+ and isinstance(val, Sequence)
110
+ and _is_iterable_of(val, (Path, str))
111
+ )
112
+
113
+
114
+ def is_bool_sequence(
115
+ val: object, *, include_series: bool = False
116
+ ) -> TypeGuard[Sequence[bool]]:
117
+ """Check whether the given sequence is a sequence of booleans."""
118
+ if _check_for_numpy(val) and isinstance(val, np.ndarray):
119
+ return val.dtype == np.bool_
120
+ elif include_series and isinstance(val, pl.Series):
121
+ return val.dtype == pl.Boolean
122
+ return isinstance(val, Sequence) and _is_iterable_of(val, bool)
123
+
124
+
125
+ def is_int_sequence(
126
+ val: object, *, include_series: bool = False
127
+ ) -> TypeGuard[Sequence[int]]:
128
+ """Check whether the given sequence is a sequence of integers."""
129
+ if _check_for_numpy(val) and isinstance(val, np.ndarray):
130
+ return np.issubdtype(val.dtype, np.integer)
131
+ elif include_series and isinstance(val, pl.Series):
132
+ return val.dtype.is_integer()
133
+ return isinstance(val, Sequence) and _is_iterable_of(val, int)
134
+
135
+
136
+ def is_sequence(
137
+ val: object, *, include_series: bool = False
138
+ ) -> TypeGuard[Sequence[Any]]:
139
+ """Check whether the given input is a numpy array or python sequence."""
140
+ return (_check_for_numpy(val) and isinstance(val, np.ndarray)) or (
141
+ isinstance(val, (pl.Series, Sequence) if include_series else Sequence)
142
+ and not isinstance(val, str)
143
+ )
144
+
145
+
146
+ def is_str_sequence(
147
+ val: object, *, allow_str: bool = False, include_series: bool = False
148
+ ) -> TypeGuard[Sequence[str]]:
149
+ """
150
+ Check that `val` is a sequence of strings.
151
+
152
+ Note that a single string is a sequence of strings by definition, use
153
+ `allow_str=False` to return False on a single string.
154
+ """
155
+ if allow_str is False and isinstance(val, str):
156
+ return False
157
+ elif _check_for_numpy(val) and isinstance(val, np.ndarray):
158
+ return np.issubdtype(val.dtype, np.str_)
159
+ elif include_series and isinstance(val, pl.Series):
160
+ return val.dtype == pl.String
161
+ return isinstance(val, Sequence) and _is_iterable_of(val, str)
162
+
163
+
164
+ def is_column(obj: Any) -> bool:
165
+ """Indicate if the given object is a basic/unaliased column."""
166
+ from polars.expr import Expr
167
+
168
+ return isinstance(obj, Expr) and obj.meta.is_column()
169
+
170
+
171
+ def warn_null_comparison(obj: Any) -> None:
172
+ """Warn for possibly unintentional comparisons with None."""
173
+ if obj is None:
174
+ warnings.warn(
175
+ "Comparisons with None always result in null. Consider using `.is_null()` or `.is_not_null()`.",
176
+ UserWarning,
177
+ stacklevel=find_stacklevel(),
178
+ )
179
+
180
+
181
+ def range_to_series(
182
+ name: str, rng: range, dtype: PolarsDataType | None = None
183
+ ) -> pl.Series:
184
+ """Fast conversion of the given range to a Series."""
185
+ dtype = dtype or Int64
186
+ if dtype.is_integer():
187
+ range = F.int_range( # type: ignore[call-overload]
188
+ start=rng.start, end=rng.stop, step=rng.step, dtype=dtype, eager=True
189
+ )
190
+ else:
191
+ range = F.int_range(
192
+ start=rng.start, end=rng.stop, step=rng.step, eager=True
193
+ ).cast(dtype)
194
+ return range.alias(name)
195
+
196
+
197
+ def range_to_slice(rng: range) -> slice:
198
+ """Return the given range as an equivalent slice."""
199
+ return slice(rng.start, rng.stop, rng.step)
200
+
201
+
202
+ def _in_notebook() -> bool:
203
+ try:
204
+ from IPython import get_ipython
205
+
206
+ if "IPKernelApp" not in get_ipython().config: # pragma: no cover
207
+ return False
208
+ except ImportError:
209
+ return False
210
+ except AttributeError:
211
+ return False
212
+ return True
213
+
214
+
215
+ def _in_marimo_notebook() -> bool:
216
+ try:
217
+ import marimo as mo
218
+
219
+ return mo.running_in_notebook() # pragma: no cover
220
+ except ImportError:
221
+ return False
222
+
223
+
224
+ def arrlen(obj: Any) -> int | None:
225
+ """Return length of (non-string/dict) sequence; returns None for non-sequences."""
226
+ try:
227
+ return None if isinstance(obj, (str, dict)) else len(obj)
228
+ except TypeError:
229
+ return None
230
+
231
+
232
+ def normalize_filepath(path: str | Path, *, check_not_directory: bool = True) -> str:
233
+ """Create a string path, expanding the home directory if present."""
234
+ # don't use pathlib here as it modifies slashes (s3:// -> s3:/)
235
+ path = os.path.expanduser(path) # noqa: PTH111
236
+ if (
237
+ check_not_directory
238
+ and os.path.exists(path) # noqa: PTH110
239
+ and os.path.isdir(path) # noqa: PTH112
240
+ ):
241
+ msg = f"expected a file path; {path!r} is a directory"
242
+ raise IsADirectoryError(msg)
243
+ return path
244
+
245
+
246
+ def parse_version(version: Sequence[str | int]) -> tuple[int, ...]:
247
+ """Simple version parser; split into a tuple of ints for comparison."""
248
+ if isinstance(version, str):
249
+ version = version.split(".")
250
+ return tuple(int(re.sub(r"\D", "", str(v))) for v in version)
251
+
252
+
253
+ def ordered_unique(values: Sequence[Any]) -> list[Any]:
254
+ """Return unique list of sequence values, maintaining their order of appearance."""
255
+ seen: set[Any] = set()
256
+ add_ = seen.add
257
+ return [v for v in values if not (v in seen or add_(v))]
258
+
259
+
260
+ def deduplicate_names(names: Iterable[str]) -> list[str]:
261
+ """Ensure name uniqueness by appending a counter to subsequent duplicates."""
262
+ seen: MutableMapping[str, int] = Counter()
263
+ deduped = []
264
+ for nm in names:
265
+ deduped.append(f"{nm}{seen[nm] - 1}" if nm in seen else nm)
266
+ seen[nm] += 1
267
+ return deduped
268
+
269
+
270
+ @overload
271
+ def scale_bytes(sz: int, unit: SizeUnit) -> int | float: ...
272
+
273
+
274
+ @overload
275
+ def scale_bytes(sz: Expr, unit: SizeUnit) -> Expr: ...
276
+
277
+
278
+ def scale_bytes(sz: int | Expr, unit: SizeUnit) -> int | float | Expr:
279
+ """Scale size in bytes to other size units (eg: "kb", "mb", "gb", "tb")."""
280
+ if unit in {"b", "bytes"}:
281
+ return sz
282
+ elif unit in {"kb", "kilobytes"}:
283
+ return sz / 1024
284
+ elif unit in {"mb", "megabytes"}:
285
+ return sz / 1024**2
286
+ elif unit in {"gb", "gigabytes"}:
287
+ return sz / 1024**3
288
+ elif unit in {"tb", "terabytes"}:
289
+ return sz / 1024**4
290
+ else:
291
+ msg = f"`unit` must be one of {{'b', 'kb', 'mb', 'gb', 'tb'}}, got {unit!r}"
292
+ raise ValueError(msg)
293
+
294
+
295
+ def _cast_repr_strings_with_schema(
296
+ df: DataFrame, schema: dict[str, PolarsDataType | None]
297
+ ) -> DataFrame:
298
+ """
299
+ Utility function to cast table repr/string values into frame-native types.
300
+
301
+ Parameters
302
+ ----------
303
+ df
304
+ Dataframe containing string-repr column data.
305
+ schema
306
+ DataFrame schema containing the desired end-state types.
307
+
308
+ Notes
309
+ -----
310
+ Table repr strings are less strict (or different) than equivalent CSV data, so need
311
+ special handling; as this function is only used for reprs, parsing is flexible.
312
+ """
313
+ tp: PolarsDataType | None
314
+ if not df.is_empty():
315
+ for tp in df.schema.values():
316
+ if tp != String:
317
+ msg = f"DataFrame should contain only String repr data; found {tp!r}"
318
+ raise TypeError(msg)
319
+
320
+ special_floats = {"-inf", "+inf", "inf", "nan"}
321
+
322
+ # duration string scaling
323
+ ns_sec = 1_000_000_000
324
+ duration_scaling = {
325
+ "ns": 1,
326
+ "us": 1_000,
327
+ "µs": 1_000,
328
+ "ms": 1_000_000,
329
+ "s": ns_sec,
330
+ "m": ns_sec * 60,
331
+ "h": ns_sec * 60 * 60,
332
+ "d": ns_sec * 3_600 * 24,
333
+ "w": ns_sec * 3_600 * 24 * 7,
334
+ }
335
+
336
+ # identify duration units and convert to nanoseconds
337
+ def str_duration_(td: str | None) -> int | None:
338
+ return (
339
+ None
340
+ if td is None
341
+ else sum(
342
+ int(value) * duration_scaling[unit.strip()]
343
+ for value, unit in re.findall(r"([+-]?\d+)(\D+)", td)
344
+ )
345
+ )
346
+
347
+ cast_cols = {}
348
+ for c, tp in schema.items():
349
+ if tp is not None:
350
+ if tp.base_type() == Datetime:
351
+ tp_base = Datetime(tp.time_unit) # type: ignore[union-attr]
352
+ d = F.col(c).str.replace(r"[A-Z ]+$", "")
353
+ cast_cols[c] = (
354
+ F.when(d.str.len_bytes() == 19)
355
+ .then(d + ".000000000")
356
+ .otherwise(d + "000000000")
357
+ .str.slice(0, 29)
358
+ .str.strptime(tp_base, "%Y-%m-%d %H:%M:%S.%9f")
359
+ )
360
+ if getattr(tp, "time_zone", None) is not None:
361
+ cast_cols[c] = cast_cols[c].dt.replace_time_zone(tp.time_zone) # type: ignore[union-attr]
362
+ elif tp == Date:
363
+ cast_cols[c] = F.col(c).str.strptime(tp, "%Y-%m-%d") # type: ignore[arg-type]
364
+ elif tp == Time:
365
+ cast_cols[c] = (
366
+ F.when(F.col(c).str.len_bytes() == 8)
367
+ .then(F.col(c) + ".000000000")
368
+ .otherwise(F.col(c) + "000000000")
369
+ .str.slice(0, 18)
370
+ .str.strptime(tp, "%H:%M:%S.%9f") # type: ignore[arg-type]
371
+ )
372
+ elif tp == Duration:
373
+ cast_cols[c] = (
374
+ F.col(c)
375
+ .map_elements(str_duration_, return_dtype=Int64)
376
+ .cast(Duration("ns"))
377
+ .cast(tp)
378
+ )
379
+ elif tp == Boolean:
380
+ cast_cols[c] = F.col(c).replace_strict({"true": True, "false": False})
381
+ elif tp in INTEGER_DTYPES:
382
+ int_string = F.col(c).str.replace_all(r"[^\d+-]", "")
383
+ cast_cols[c] = (
384
+ pl.when(int_string.str.len_bytes() > 0).then(int_string).cast(tp)
385
+ )
386
+ elif tp in FLOAT_DTYPES or tp.base_type() == Decimal:
387
+ # identify integer/fractional parts
388
+ integer_part = F.col(c).str.replace(r"^(.*)\D(\d*)$", "$1")
389
+ fractional_part = F.col(c).str.replace(r"^(.*)\D(\d*)$", "$2")
390
+ cast_cols[c] = (
391
+ # check for empty string, special floats, or integer format
392
+ pl.when(
393
+ F.col(c).str.contains(r"^[+-]?\d*$")
394
+ | F.col(c).str.to_lowercase().is_in(special_floats)
395
+ )
396
+ .then(pl.when(F.col(c).str.len_bytes() > 0).then(F.col(c)))
397
+ # check for scientific notation
398
+ .when(F.col(c).str.contains("[eE]"))
399
+ .then(F.col(c).str.replace(r"[^eE\d]", "."))
400
+ .otherwise(
401
+ # recombine sanitised integer/fractional components
402
+ pl.concat_str(
403
+ integer_part.str.replace_all(r"[^\d+-]", ""),
404
+ fractional_part,
405
+ separator=".",
406
+ )
407
+ )
408
+ .cast(String)
409
+ .cast(tp)
410
+ )
411
+ elif tp != df.schema[c]:
412
+ cast_cols[c] = F.col(c).cast(tp)
413
+
414
+ return df.with_columns(**cast_cols) if cast_cols else df
415
+
416
+
417
+ # when building docs (with Sphinx) we need access to the functions
418
+ # associated with the namespaces from the class, as we don't have
419
+ # an instance; @sphinx_accessor is a @property that allows this.
420
+ NS = TypeVar("NS")
421
+
422
+
423
+ class sphinx_accessor(property):
424
+ def __get__( # type: ignore[override]
425
+ self,
426
+ instance: Any,
427
+ cls: type[NS],
428
+ ) -> NS:
429
+ try:
430
+ return self.fget( # type: ignore[misc]
431
+ instance if isinstance(instance, cls) else cls
432
+ )
433
+ except (AttributeError, ImportError):
434
+ return self # type: ignore[return-value]
435
+
436
+
437
+ BUILDING_SPHINX_DOCS = os.getenv("BUILDING_SPHINX_DOCS")
438
+
439
+
440
+ class _NoDefault(Enum):
441
+ # "borrowed" from
442
+ # https://github.com/pandas-dev/pandas/blob/e7859983a814b1823cf26e3b491ae2fa3be47c53/pandas/_libs/lib.pyx#L2736-L2748
443
+ no_default = "NO_DEFAULT"
444
+
445
+ def __repr__(self) -> str:
446
+ return "<no_default>"
447
+
448
+
449
+ # the "no_default" sentinel should typically be used when one of the valid parameter
450
+ # values is None, as otherwise we cannot determine if the caller has set that value.
451
+ no_default = _NoDefault.no_default
452
+ NoDefault = Literal[_NoDefault.no_default]
453
+
454
+
455
+ def find_stacklevel() -> int:
456
+ """
457
+ Find the first place in the stack that is not inside Polars.
458
+
459
+ Taken from:
460
+ https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
461
+ """
462
+ pkg_dir = str(Path(pl.__file__).parent)
463
+
464
+ # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
465
+ frame = inspect.currentframe()
466
+ n = 0
467
+ try:
468
+ while frame:
469
+ fname = inspect.getfile(frame)
470
+ if fname.startswith(pkg_dir) or (
471
+ (qualname := getattr(frame.f_code, "co_qualname", None))
472
+ # ignore @singledispatch wrappers
473
+ and qualname.startswith("singledispatch.")
474
+ ):
475
+ frame = frame.f_back
476
+ n += 1
477
+ else:
478
+ break
479
+ finally:
480
+ # https://docs.python.org/3/library/inspect.html
481
+ # > Though the cycle detector will catch these, destruction of the frames
482
+ # > (and local variables) can be made deterministic by removing the cycle
483
+ # > in a 'finally' clause.
484
+ del frame
485
+ return n
486
+
487
+
488
+ def issue_warning(message: str, category: type[Warning], **kwargs: Any) -> None:
489
+ """
490
+ Issue a warning.
491
+
492
+ Parameters
493
+ ----------
494
+ message
495
+ The message associated with the warning.
496
+ category
497
+ The warning category.
498
+ **kwargs
499
+ Additional arguments for `warnings.warn`. Note that the `stacklevel` is
500
+ determined automatically.
501
+ """
502
+ warnings.warn(
503
+ message=message, category=category, stacklevel=find_stacklevel(), **kwargs
504
+ )
505
+
506
+
507
+ def _get_stack_locals(
508
+ of_type: type | Collection[type] | Callable[[Any], bool] | None = None,
509
+ *,
510
+ named: str | Collection[str] | None = None,
511
+ n_objects: int | None = None,
512
+ n_frames: int | None = None,
513
+ ) -> dict[str, Any]:
514
+ """
515
+ Retrieve f_locals from all (or the last 'n') stack frames from the calling location.
516
+
517
+ Parameters
518
+ ----------
519
+ of_type
520
+ Only return objects of this type; can be a single class, tuple of
521
+ classes, or a callable that returns True/False if the object being
522
+ tested is considered a match.
523
+ n_objects
524
+ If specified, return only the most recent `n` matching objects.
525
+ n_frames
526
+ If specified, look at objects in the last `n` stack frames only.
527
+ named
528
+ If specified, only return objects matching the given name(s).
529
+ """
530
+ objects = {}
531
+ examined_frames = 0
532
+
533
+ if isinstance(named, str):
534
+ named = (named,)
535
+ if n_frames is None:
536
+ n_frames = sys.maxsize
537
+
538
+ if inspect.isfunction(of_type):
539
+ matches_type = of_type
540
+ else:
541
+ if isinstance(of_type, Collection):
542
+ of_type = tuple(of_type)
543
+
544
+ def matches_type(obj: Any) -> bool: # type: ignore[misc]
545
+ return isinstance(obj, of_type) # type: ignore[arg-type]
546
+
547
+ if named is not None:
548
+ if isinstance(named, str):
549
+ named = (named,)
550
+ elif not isinstance(named, set):
551
+ named = set(named)
552
+
553
+ stack_frame = inspect.currentframe()
554
+ stack_frame = getattr(stack_frame, "f_back", None)
555
+ try:
556
+ while stack_frame and examined_frames < n_frames:
557
+ local_items = list(stack_frame.f_locals.items())
558
+ for nm, obj in reversed(local_items):
559
+ if (
560
+ nm not in objects
561
+ and (named is None or nm in named)
562
+ and (of_type is None or matches_type(obj))
563
+ ):
564
+ objects[nm] = obj
565
+ if n_objects is not None and len(objects) >= n_objects:
566
+ return objects
567
+
568
+ stack_frame = stack_frame.f_back
569
+ examined_frames += 1
570
+ finally:
571
+ # https://docs.python.org/3/library/inspect.html
572
+ # > Though the cycle detector will catch these, destruction of the frames
573
+ # > (and local variables) can be made deterministic by removing the cycle
574
+ # > in a finally clause.
575
+ del stack_frame
576
+
577
+ return objects
578
+
579
+
580
+ # this is called from rust
581
+ def _polars_warn(msg: str, category: type[Warning] = UserWarning) -> None:
582
+ warnings.warn(
583
+ msg,
584
+ category=category,
585
+ stacklevel=find_stacklevel(),
586
+ )
587
+
588
+
589
+ def extend_bool(
590
+ value: bool | Sequence[bool],
591
+ n_match: int,
592
+ value_name: str,
593
+ match_name: str,
594
+ ) -> Sequence[bool]:
595
+ """Ensure the given bool or sequence of bools is the correct length."""
596
+ values = [value] * n_match if isinstance(value, bool) else value
597
+ if n_match != len(values):
598
+ msg = (
599
+ f"the length of `{value_name}` ({len(values)}) "
600
+ f"does not match the length of `{match_name}` ({n_match})"
601
+ )
602
+ raise ValueError(msg)
603
+ return values
604
+
605
+
606
+ def in_terminal_that_supports_colour() -> bool:
607
+ """
608
+ Determine (within reason) if we are in an interactive terminal that supports color.
609
+
610
+ Note: this is not exhaustive, but it covers a lot (most?) of the common cases.
611
+ """
612
+ if hasattr(sys.stdout, "isatty"):
613
+ # can enhance as necessary, but this is a reasonable start
614
+ return (
615
+ sys.stdout.isatty()
616
+ and (
617
+ sys.platform != "win32"
618
+ or "ANSICON" in os.environ
619
+ or "WT_SESSION" in os.environ
620
+ or os.environ.get("TERM_PROGRAM") == "vscode"
621
+ or os.environ.get("TERM") == "xterm-256color"
622
+ )
623
+ ) or os.environ.get("PYCHARM_HOSTED") == "1"
624
+ return False
625
+
626
+
627
+ def parse_percentiles(
628
+ percentiles: Sequence[float] | float | None, *, inject_median: bool = False
629
+ ) -> Sequence[float]:
630
+ """
631
+ Transforms raw percentiles into our preferred format, adding the 50th percentile.
632
+
633
+ Raises a ValueError if the percentile sequence is invalid
634
+ (e.g. outside the range [0, 1])
635
+ """
636
+ if isinstance(percentiles, float):
637
+ percentiles = [percentiles]
638
+ elif percentiles is None:
639
+ percentiles = []
640
+ if not all((0 <= p <= 1) for p in percentiles):
641
+ msg = "`percentiles` must all be in the range [0, 1]"
642
+ raise ValueError(msg)
643
+
644
+ sub_50_percentiles = sorted(p for p in percentiles if p < 0.5)
645
+ at_or_above_50_percentiles = sorted(p for p in percentiles if p >= 0.5)
646
+
647
+ if inject_median and (
648
+ not at_or_above_50_percentiles or at_or_above_50_percentiles[0] != 0.5
649
+ ):
650
+ at_or_above_50_percentiles = [0.5, *at_or_above_50_percentiles]
651
+
652
+ return [*sub_50_percentiles, *at_or_above_50_percentiles]
653
+
654
+
655
+ def re_escape(s: str) -> str:
656
+ """Escape a string for use in a Polars (Rust) regex."""
657
+ # note: almost the same as the standard python 're.escape' function, but
658
+ # escapes _only_ those metachars with meaning to the rust regex crate
659
+ re_rust_metachars = r"\\?()|\[\]{}^$#&~.+*-"
660
+ return re.sub(f"([{re_rust_metachars}])", r"\\\1", s)
661
+
662
+
663
+ # Don't rename or move. This is used by polars cloud
664
+ def display_dot_graph(
665
+ *,
666
+ dot: str,
667
+ show: bool = True,
668
+ output_path: str | Path | None = None,
669
+ raw_output: bool = False,
670
+ figsize: tuple[float, float] = (16.0, 12.0),
671
+ ) -> str | None:
672
+ if raw_output:
673
+ # we do not show a graph, nor save a graph to disk
674
+ return dot
675
+
676
+ output_type = (
677
+ "svg"
678
+ if _in_notebook()
679
+ or _in_marimo_notebook()
680
+ or "POLARS_DOT_SVG_VIEWER" in os.environ
681
+ else "png"
682
+ )
683
+
684
+ try:
685
+ graph = subprocess.check_output(
686
+ ["dot", "-Nshape=box", "-T" + output_type], input=f"{dot}".encode()
687
+ )
688
+ except (ImportError, FileNotFoundError):
689
+ msg = (
690
+ "the graphviz `dot` binary should be on your PATH."
691
+ "(If not installed you can download here: https://graphviz.org/download/)"
692
+ )
693
+ raise ImportError(msg) from None
694
+
695
+ if output_path:
696
+ Path(output_path).write_bytes(graph)
697
+
698
+ if not show:
699
+ return None
700
+
701
+ if _in_notebook():
702
+ from IPython.display import SVG, display
703
+
704
+ return display(SVG(graph))
705
+ elif _in_marimo_notebook():
706
+ import marimo as mo
707
+
708
+ return mo.Html(f"{graph.decode()}")
709
+ else:
710
+ if (cmd := os.environ.get("POLARS_DOT_SVG_VIEWER", None)) is not None:
711
+ import tempfile
712
+
713
+ with tempfile.NamedTemporaryFile(suffix=".svg") as file:
714
+ file.write(graph)
715
+ file.flush()
716
+ cmd = cmd.replace("%file%", file.name)
717
+ subprocess.run(cmd, shell=True)
718
+ return None
719
+
720
+ import_optional(
721
+ "matplotlib",
722
+ err_prefix="",
723
+ err_suffix="should be installed to show graphs",
724
+ )
725
+ import matplotlib.image as mpimg
726
+ import matplotlib.pyplot as plt
727
+
728
+ plt.figure(figsize=figsize)
729
+ img = mpimg.imread(BytesIO(graph))
730
+ plt.axis("off")
731
+ plt.imshow(img)
732
+ plt.show()
733
+ return None
734
+
735
+
736
+ def qualified_type_name(obj: Any, *, qualify_polars: bool = False) -> str:
737
+ """
738
+ Return the module-qualified name of the given object as a string.
739
+
740
+ Parameters
741
+ ----------
742
+ obj
743
+ The object to get the qualified name for.
744
+ qualify_polars
745
+ If False (default), omit the module path for our own (Polars) objects.
746
+ """
747
+ if isinstance(obj, type):
748
+ module = obj.__module__
749
+ name = obj.__name__
750
+ else:
751
+ module = obj.__class__.__module__
752
+ name = obj.__class__.__name__
753
+
754
+ if (
755
+ not module
756
+ or module == "builtins"
757
+ or (not qualify_polars and module.startswith("polars."))
758
+ ):
759
+ return name
760
+
761
+ return f"{module}.{name}"
762
+
763
+
764
+ def require_same_type(current: Any, other: Any) -> None:
765
+ """
766
+ Raise an error if the two arguments are not of the same type.
767
+
768
+ The check will not raise an error if one object is of a subclass of the other.
769
+
770
+ Parameters
771
+ ----------
772
+ current
773
+ The object the type of which is being checked against.
774
+ other
775
+ An object that has to be of the same type.
776
+ """
777
+ if not isinstance(other, type(current)) and not isinstance(current, type(other)):
778
+ msg = (
779
+ f"expected `other` to be a {qualified_type_name(current)!r}, "
780
+ f"not {qualified_type_name(other)!r}"
781
+ )
782
+ raise TypeError(msg)