polars-runtime-compat 1.34.0b2__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from polars._typing import TimeUnit
7
+
8
+ # Number of rows to scan by default when inferring datatypes
9
+ N_INFER_DEFAULT = 100
10
+
11
+ DTYPE_TEMPORAL_UNITS: frozenset[TimeUnit] = frozenset(["ns", "us", "ms"])
@@ -0,0 +1,172 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ from decimal import Decimal as PyDecimal
5
+ from typing import TYPE_CHECKING, Any, Callable
6
+
7
+ from polars import datatypes as dt
8
+ from polars._dependencies import numpy as np
9
+
10
+ # Module not available when building docs
11
+ try:
12
+ from polars._plr import PySeries
13
+
14
+ _DOCUMENTING = False
15
+ except ImportError:
16
+ _DOCUMENTING = True
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Sequence
20
+
21
+ from polars._typing import PolarsDataType
22
+
23
+ if not _DOCUMENTING:
24
+ _POLARS_TYPE_TO_CONSTRUCTOR: dict[
25
+ PolarsDataType, Callable[[str, Sequence[Any], bool], PySeries]
26
+ ] = {
27
+ dt.Float32: PySeries.new_opt_f32,
28
+ dt.Float64: PySeries.new_opt_f64,
29
+ dt.Int8: PySeries.new_opt_i8,
30
+ dt.Int16: PySeries.new_opt_i16,
31
+ dt.Int32: PySeries.new_opt_i32,
32
+ dt.Int64: PySeries.new_opt_i64,
33
+ dt.Int128: PySeries.new_opt_i128,
34
+ dt.UInt8: PySeries.new_opt_u8,
35
+ dt.UInt16: PySeries.new_opt_u16,
36
+ dt.UInt32: PySeries.new_opt_u32,
37
+ dt.UInt64: PySeries.new_opt_u64,
38
+ dt.UInt128: PySeries.new_opt_u128,
39
+ dt.Decimal: PySeries.new_decimal,
40
+ dt.Date: PySeries.new_opt_i32,
41
+ dt.Datetime: PySeries.new_opt_i64,
42
+ dt.Duration: PySeries.new_opt_i64,
43
+ dt.Time: PySeries.new_opt_i64,
44
+ dt.Boolean: PySeries.new_opt_bool,
45
+ dt.String: PySeries.new_str,
46
+ dt.Object: PySeries.new_object,
47
+ dt.Categorical: PySeries.new_str,
48
+ dt.Enum: PySeries.new_str,
49
+ dt.Binary: PySeries.new_binary,
50
+ dt.Null: PySeries.new_null,
51
+ }
52
+
53
+
54
+ def polars_type_to_constructor(
55
+ dtype: PolarsDataType,
56
+ ) -> Callable[[str, Sequence[Any], bool], PySeries]:
57
+ """Get the right PySeries constructor for the given Polars dtype."""
58
+ # Special case for Array as it needs to pass the dtype argument on construction
59
+ if isinstance(dtype, dt.Array):
60
+ return functools.partial(PySeries.new_array, dtype=dtype)
61
+
62
+ try:
63
+ base_type = dtype.base_type()
64
+ return _POLARS_TYPE_TO_CONSTRUCTOR[base_type]
65
+ except KeyError: # pragma: no cover
66
+ msg = f"cannot construct PySeries for type {dtype!r}"
67
+ raise ValueError(msg) from None
68
+
69
+
70
+ _NUMPY_TYPE_TO_CONSTRUCTOR = None
71
+
72
+
73
+ def _set_numpy_to_constructor() -> None:
74
+ global _NUMPY_TYPE_TO_CONSTRUCTOR
75
+ _NUMPY_TYPE_TO_CONSTRUCTOR = {
76
+ np.float32: PySeries.new_f32,
77
+ np.float64: PySeries.new_f64,
78
+ np.int8: PySeries.new_i8,
79
+ np.int16: PySeries.new_i16,
80
+ np.int32: PySeries.new_i32,
81
+ np.int64: PySeries.new_i64,
82
+ np.uint8: PySeries.new_u8,
83
+ np.uint16: PySeries.new_u16,
84
+ np.uint32: PySeries.new_u32,
85
+ np.uint64: PySeries.new_u64,
86
+ np.str_: PySeries.new_str,
87
+ np.bytes_: PySeries.new_binary,
88
+ np.bool_: PySeries.new_bool,
89
+ np.datetime64: PySeries.new_i64,
90
+ np.timedelta64: PySeries.new_i64,
91
+ }
92
+
93
+
94
+ @functools.lru_cache(maxsize=32)
95
+ def _normalise_numpy_dtype(dtype: Any) -> tuple[Any, Any]:
96
+ normalised_dtype = (
97
+ np.dtype(dtype.base.name) if dtype.kind in ("i", "u", "f") else dtype
98
+ ).type
99
+ cast_as: Any = None
100
+ if normalised_dtype == np.float16:
101
+ normalised_dtype = cast_as = np.float32
102
+ elif normalised_dtype in (np.datetime64, np.timedelta64):
103
+ time_unit = np.datetime_data(dtype)[0]
104
+ if time_unit in dt.DTYPE_TEMPORAL_UNITS or (
105
+ time_unit == "D" and normalised_dtype == np.datetime64
106
+ ):
107
+ cast_as = np.int64
108
+ else:
109
+ msg = (
110
+ "incorrect NumPy datetime resolution"
111
+ "\n\n'D' (datetime only), 'ms', 'us', and 'ns' resolutions are supported when converting from numpy.{datetime64,timedelta64}."
112
+ " Please cast to the closest supported unit before converting."
113
+ )
114
+ raise ValueError(msg)
115
+ return normalised_dtype, cast_as
116
+
117
+
118
+ def numpy_values_and_dtype(
119
+ values: np.ndarray[Any, Any],
120
+ ) -> tuple[np.ndarray[Any, Any], type]:
121
+ """Return numpy values and their associated dtype, adjusting if required."""
122
+ # Create new dtype object from dtype base name so architecture specific
123
+ # dtypes (np.longlong np.ulonglong np.intc np.uintc np.longdouble, ...)
124
+ # get converted to their normalized dtype (np.int*, np.uint*, np.float*).
125
+ dtype, cast_as = _normalise_numpy_dtype(values.dtype)
126
+ if cast_as:
127
+ values = values.astype(cast_as)
128
+ return values, dtype
129
+
130
+
131
+ def numpy_type_to_constructor(
132
+ values: np.ndarray[Any, Any], dtype: type[np.dtype[Any]]
133
+ ) -> Callable[..., PySeries]:
134
+ """Get the right PySeries constructor for the given Polars dtype."""
135
+ if _NUMPY_TYPE_TO_CONSTRUCTOR is None:
136
+ _set_numpy_to_constructor()
137
+ try:
138
+ return _NUMPY_TYPE_TO_CONSTRUCTOR[dtype] # type:ignore[index]
139
+ except KeyError:
140
+ if len(values) > 0:
141
+ first_non_nan = next(
142
+ (v for v in values if isinstance(v, np.ndarray) or v == v), None
143
+ )
144
+ if isinstance(first_non_nan, str):
145
+ return PySeries.new_str
146
+ if isinstance(first_non_nan, bytes):
147
+ return PySeries.new_binary
148
+ return PySeries.new_object
149
+ except NameError: # pragma: no cover
150
+ msg = f"'numpy' is required to convert numpy dtype {dtype!r}"
151
+ raise ModuleNotFoundError(msg) from None
152
+
153
+
154
+ if not _DOCUMENTING:
155
+ _PY_TYPE_TO_CONSTRUCTOR = {
156
+ float: PySeries.new_opt_f64,
157
+ bool: PySeries.new_opt_bool,
158
+ int: PySeries.new_opt_i64,
159
+ str: PySeries.new_str,
160
+ bytes: PySeries.new_binary,
161
+ PyDecimal: PySeries.new_decimal,
162
+ }
163
+
164
+
165
+ def py_type_to_constructor(py_type: type[Any]) -> Callable[..., PySeries]:
166
+ """Get the right PySeries constructor for the given Python dtype."""
167
+ py_type = (
168
+ next((tp for tp in _PY_TYPE_TO_CONSTRUCTOR if issubclass(py_type, tp)), py_type)
169
+ if py_type not in _PY_TYPE_TO_CONSTRUCTOR
170
+ else py_type
171
+ )
172
+ return _PY_TYPE_TO_CONSTRUCTOR.get(py_type, PySeries.new_object)
@@ -0,0 +1,366 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import functools
5
+ import re
6
+ import sys
7
+ from collections.abc import Collection
8
+ from datetime import date, datetime, time, timedelta
9
+ from decimal import Decimal as PyDecimal
10
+ from typing import TYPE_CHECKING, Any, Optional, Union
11
+
12
+ from polars._dependencies import numpy as np
13
+ from polars._dependencies import pyarrow as pa
14
+ from polars.datatypes.classes import (
15
+ Array,
16
+ Binary,
17
+ Boolean,
18
+ Categorical,
19
+ DataType,
20
+ DataTypeClass,
21
+ Date,
22
+ Datetime,
23
+ Decimal,
24
+ Duration,
25
+ Enum,
26
+ Field,
27
+ Float32,
28
+ Float64,
29
+ Int8,
30
+ Int16,
31
+ Int32,
32
+ Int64,
33
+ Int128,
34
+ List,
35
+ Null,
36
+ Object,
37
+ String,
38
+ Struct,
39
+ Time,
40
+ UInt8,
41
+ UInt16,
42
+ UInt32,
43
+ UInt64,
44
+ UInt128,
45
+ Unknown,
46
+ )
47
+
48
+ with contextlib.suppress(ImportError): # Module not available when building docs
49
+ from polars._plr import dtype_str_repr as _dtype_str_repr
50
+
51
+
52
+ OptionType = type(Optional[type])
53
+ if sys.version_info >= (3, 10):
54
+ from types import NoneType, UnionType
55
+ else:
56
+ # infer equivalent class
57
+ NoneType = type(None)
58
+ UnionType = type(Union[int, float])
59
+
60
+ if TYPE_CHECKING:
61
+ from polars._typing import PolarsDataType, PythonDataType, TimeUnit
62
+
63
+ if sys.version_info >= (3, 10):
64
+ from typing import TypeGuard
65
+ else:
66
+ from typing_extensions import TypeGuard
67
+
68
+
69
+ def is_polars_dtype(
70
+ dtype: Any,
71
+ *,
72
+ include_unknown: bool = False,
73
+ require_instantiated: bool = False,
74
+ ) -> TypeGuard[PolarsDataType]:
75
+ """Indicate whether the given input is a Polars dtype, or dtype specialization."""
76
+ check_classes = DataType if require_instantiated else (DataType, DataTypeClass)
77
+ is_dtype = isinstance(dtype, check_classes)
78
+
79
+ if not include_unknown:
80
+ return is_dtype and dtype != Unknown
81
+ else:
82
+ return is_dtype
83
+
84
+
85
+ def unpack_dtypes(
86
+ *dtypes: PolarsDataType | None,
87
+ include_compound: bool = False,
88
+ ) -> set[PolarsDataType]:
89
+ """
90
+ Return a set of unique dtypes found in one or more (potentially compound) dtypes.
91
+
92
+ Parameters
93
+ ----------
94
+ *dtypes
95
+ One or more Polars dtypes.
96
+ include_compound
97
+ * if True, any parent/compound dtypes (List, Struct) are included in the result.
98
+ * if False, only the child/scalar dtypes are returned from these types.
99
+
100
+ Examples
101
+ --------
102
+ >>> from polars.datatypes import unpack_dtypes
103
+ >>> list_dtype = [pl.List(pl.Float64)]
104
+ >>> struct_dtype = pl.Struct(
105
+ ... [
106
+ ... pl.Field("a", pl.Int64),
107
+ ... pl.Field("b", pl.String),
108
+ ... pl.Field("c", pl.List(pl.Float64)),
109
+ ... ]
110
+ ... )
111
+ >>> unpack_dtypes([struct_dtype, list_dtype]) # doctest: +IGNORE_RESULT
112
+ {Float64, Int64, String}
113
+ >>> unpack_dtypes(
114
+ ... [struct_dtype, list_dtype], include_compound=True
115
+ ... ) # doctest: +IGNORE_RESULT
116
+ {Float64, Int64, String, List(Float64), Struct([Field('a', Int64), Field('b', String), Field('c', List(Float64))])}
117
+ """ # noqa: W505
118
+ if not dtypes:
119
+ return set()
120
+ elif len(dtypes) == 1 and isinstance(dtypes[0], Collection):
121
+ dtypes = dtypes[0]
122
+
123
+ unpacked: set[PolarsDataType] = set()
124
+ for tp in dtypes:
125
+ if isinstance(tp, (List, Array)):
126
+ if include_compound:
127
+ unpacked.add(tp)
128
+ unpacked.update(unpack_dtypes(tp.inner, include_compound=include_compound))
129
+ elif isinstance(tp, Struct):
130
+ if include_compound:
131
+ unpacked.add(tp)
132
+ unpacked.update(unpack_dtypes(tp.fields, include_compound=include_compound)) # type: ignore[arg-type]
133
+ elif isinstance(tp, Field):
134
+ unpacked.update(unpack_dtypes(tp.dtype, include_compound=include_compound))
135
+ elif tp is not None and is_polars_dtype(tp):
136
+ unpacked.add(tp)
137
+ return unpacked
138
+
139
+
140
+ class _DataTypeMappings:
141
+ @property
142
+ @functools.lru_cache # noqa: B019
143
+ def DTYPE_TO_FFINAME(self) -> dict[PolarsDataType, str]:
144
+ return {
145
+ Binary: "binary",
146
+ Boolean: "bool",
147
+ Categorical: "categorical",
148
+ Date: "date",
149
+ Datetime: "datetime",
150
+ Decimal: "decimal",
151
+ Duration: "duration",
152
+ Float32: "f32",
153
+ Float64: "f64",
154
+ Int8: "i8",
155
+ Int16: "i16",
156
+ Int32: "i32",
157
+ Int64: "i64",
158
+ Int128: "i128",
159
+ List: "list",
160
+ Object: "object",
161
+ String: "str",
162
+ Struct: "struct",
163
+ Time: "time",
164
+ UInt8: "u8",
165
+ UInt16: "u16",
166
+ UInt32: "u32",
167
+ UInt64: "u64",
168
+ UInt128: "u128",
169
+ }
170
+
171
+ @property
172
+ @functools.lru_cache # noqa: B019
173
+ def DTYPE_TO_PY_TYPE(self) -> dict[PolarsDataType, PythonDataType]:
174
+ return {
175
+ Array: list,
176
+ Binary: bytes,
177
+ Boolean: bool,
178
+ Date: date,
179
+ Datetime: datetime,
180
+ Decimal: PyDecimal,
181
+ Duration: timedelta,
182
+ Float32: float,
183
+ Float64: float,
184
+ Int8: int,
185
+ Int16: int,
186
+ Int32: int,
187
+ Int64: int,
188
+ Int128: int,
189
+ List: list,
190
+ Null: None.__class__,
191
+ Object: object,
192
+ String: str,
193
+ Struct: dict,
194
+ Time: time,
195
+ UInt8: int,
196
+ UInt16: int,
197
+ UInt32: int,
198
+ UInt64: int,
199
+ UInt128: int,
200
+ # the below mappings are appropriate as we restrict cat/enum to strings
201
+ Enum: str,
202
+ Categorical: str,
203
+ }
204
+
205
+ @property
206
+ @functools.lru_cache # noqa: B019
207
+ def NUMPY_KIND_AND_ITEMSIZE_TO_DTYPE(self) -> dict[tuple[str, int], PolarsDataType]:
208
+ return {
209
+ # (np.dtype().kind, np.dtype().itemsize)
210
+ ("M", 8): Datetime,
211
+ ("b", 1): Boolean,
212
+ ("f", 2): Float32,
213
+ ("f", 4): Float32,
214
+ ("f", 8): Float64,
215
+ ("i", 1): Int8,
216
+ ("i", 2): Int16,
217
+ ("i", 4): Int32,
218
+ ("i", 8): Int64,
219
+ ("m", 8): Duration,
220
+ ("u", 1): UInt8,
221
+ ("u", 2): UInt16,
222
+ ("u", 4): UInt32,
223
+ ("u", 8): UInt64,
224
+ }
225
+
226
+ @property
227
+ @functools.lru_cache # noqa: B019
228
+ def PY_TYPE_TO_ARROW_TYPE(self) -> dict[PythonDataType, pa.lib.DataType]:
229
+ return {
230
+ bool: pa.bool_(),
231
+ date: pa.date32(),
232
+ datetime: pa.timestamp("us"),
233
+ float: pa.float64(),
234
+ int: pa.int64(),
235
+ str: pa.large_utf8(),
236
+ time: pa.time64("us"),
237
+ timedelta: pa.duration("us"),
238
+ None.__class__: pa.null(),
239
+ }
240
+
241
+ @property
242
+ @functools.lru_cache # noqa: B019
243
+ def REPR_TO_DTYPE(self) -> dict[str, PolarsDataType]:
244
+ def _dtype_str_repr_safe(o: Any) -> PolarsDataType | None:
245
+ try:
246
+ return _dtype_str_repr(o.base_type()).split("[")[0] # type: ignore[return-value]
247
+ except TypeError:
248
+ return None
249
+
250
+ return {
251
+ _dtype_str_repr_safe(obj): obj # type: ignore[misc]
252
+ for obj in globals().values()
253
+ if is_polars_dtype(obj) and _dtype_str_repr_safe(obj) is not None
254
+ }
255
+
256
+
257
+ # Initialize once (poor man's singleton :)
258
+ DataTypeMappings = _DataTypeMappings()
259
+
260
+
261
+ def dtype_to_ffiname(dtype: PolarsDataType) -> str:
262
+ """Return FFI function name associated with the given Polars dtype."""
263
+ try:
264
+ dtype = dtype.base_type()
265
+ return DataTypeMappings.DTYPE_TO_FFINAME[dtype]
266
+ except KeyError: # pragma: no cover
267
+ msg = f"conversion of polars data type {dtype!r} to FFI not implemented"
268
+ raise NotImplementedError(msg) from None
269
+
270
+
271
+ def dtype_to_py_type(dtype: PolarsDataType) -> PythonDataType:
272
+ """Convert a Polars dtype to a Python dtype."""
273
+ try:
274
+ dtype = dtype.base_type()
275
+ return DataTypeMappings.DTYPE_TO_PY_TYPE[dtype]
276
+ except KeyError: # pragma: no cover
277
+ msg = f"conversion of polars data type {dtype!r} to Python type not implemented"
278
+ raise NotImplementedError(msg) from None
279
+
280
+
281
+ def py_type_to_arrow_type(dtype: PythonDataType) -> pa.lib.DataType:
282
+ """Convert a Python dtype to an Arrow dtype."""
283
+ try:
284
+ return DataTypeMappings.PY_TYPE_TO_ARROW_TYPE[dtype]
285
+ except KeyError: # pragma: no cover
286
+ msg = f"cannot parse Python data type {dtype!r} into Arrow data type"
287
+ raise ValueError(msg) from None
288
+
289
+
290
+ def dtype_short_repr_to_dtype(dtype_string: str | None) -> PolarsDataType | None:
291
+ """Map a PolarsDataType short repr (eg: 'i64', 'list[str]') back into a dtype."""
292
+ if dtype_string is None:
293
+ return None
294
+
295
+ m = re.match(r"^(\w+)(?:\[(.+)\])?$", dtype_string)
296
+ if m is None:
297
+ return None
298
+
299
+ dtype_base, subtype = m.groups()
300
+ dtype = DataTypeMappings.REPR_TO_DTYPE.get(dtype_base)
301
+ if dtype and subtype:
302
+ # TODO: further-improve handling for nested types (such as List,Struct)
303
+ try:
304
+ if dtype == Decimal:
305
+ subtype = (None, int(subtype))
306
+ else:
307
+ subtype = (
308
+ s.strip("'\" ") for s in subtype.replace("μs", "us").split(",")
309
+ )
310
+ return dtype(*subtype) # type: ignore[operator]
311
+ except ValueError:
312
+ pass
313
+ return dtype
314
+
315
+
316
+ def supported_numpy_char_code(dtype_char: str) -> bool:
317
+ """Check if the input can be mapped to a Polars dtype."""
318
+ dtype = np.dtype(dtype_char)
319
+ return (
320
+ dtype.kind,
321
+ dtype.itemsize,
322
+ ) in DataTypeMappings.NUMPY_KIND_AND_ITEMSIZE_TO_DTYPE
323
+
324
+
325
+ def numpy_char_code_to_dtype(dtype_char: str) -> PolarsDataType:
326
+ """Convert a numpy character dtype to a Polars dtype."""
327
+ dtype = np.dtype(dtype_char)
328
+ if dtype.kind == "U":
329
+ return String
330
+ elif dtype.kind == "S":
331
+ return Binary
332
+ try:
333
+ return DataTypeMappings.NUMPY_KIND_AND_ITEMSIZE_TO_DTYPE[
334
+ dtype.kind, dtype.itemsize
335
+ ]
336
+ except KeyError: # pragma: no cover
337
+ msg = f"cannot parse numpy data type {dtype!r} into Polars data type"
338
+ raise ValueError(msg) from None
339
+
340
+
341
+ def maybe_cast(el: Any, dtype: PolarsDataType) -> Any:
342
+ """Try casting a value to a value that is valid for the given Polars dtype."""
343
+ # cast el if it doesn't match
344
+ from polars._utils.convert import (
345
+ datetime_to_int,
346
+ timedelta_to_int,
347
+ )
348
+
349
+ time_unit: TimeUnit
350
+ if isinstance(el, datetime):
351
+ time_unit = getattr(dtype, "time_unit", "us")
352
+ return datetime_to_int(el, time_unit)
353
+ elif isinstance(el, timedelta):
354
+ time_unit = getattr(dtype, "time_unit", "us")
355
+ return timedelta_to_int(el, time_unit)
356
+
357
+ py_type = dtype_to_py_type(dtype)
358
+ if not isinstance(el, py_type):
359
+ try:
360
+ el = py_type(el) # type: ignore[call-arg]
361
+ except Exception:
362
+ from polars._utils.various import qualified_type_name
363
+
364
+ msg = f"cannot convert Python type {qualified_type_name(el)!r} to {dtype!r}"
365
+ raise TypeError(msg) from None
366
+ return el
@@ -0,0 +1,130 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from polars.datatypes.classes import (
6
+ Array,
7
+ DataType,
8
+ DataTypeClass,
9
+ Date,
10
+ Datetime,
11
+ Decimal,
12
+ Duration,
13
+ Float32,
14
+ Float64,
15
+ Int8,
16
+ Int16,
17
+ Int32,
18
+ Int64,
19
+ Int128,
20
+ List,
21
+ Struct,
22
+ Time,
23
+ UInt8,
24
+ UInt16,
25
+ UInt32,
26
+ UInt64,
27
+ UInt128,
28
+ )
29
+
30
+ if TYPE_CHECKING:
31
+ import sys
32
+ from collections.abc import Iterable
33
+
34
+ from polars._typing import (
35
+ PolarsDataType,
36
+ PolarsIntegerType,
37
+ PolarsTemporalType,
38
+ )
39
+
40
+ if sys.version_info >= (3, 11):
41
+ from typing import Self
42
+ else:
43
+ from typing_extensions import Self
44
+
45
+
46
+ class DataTypeGroup(frozenset): # type: ignore[type-arg]
47
+ """Group of data types."""
48
+
49
+ _match_base_type: bool
50
+
51
+ def __new__(
52
+ cls, items: Iterable[DataType | DataTypeClass], *, match_base_type: bool = True
53
+ ) -> Self:
54
+ """
55
+ Construct a DataTypeGroup.
56
+
57
+ Parameters
58
+ ----------
59
+ items :
60
+ iterable of data types
61
+ match_base_type:
62
+ match the base type
63
+ """
64
+ for it in items:
65
+ if not isinstance(it, (DataType, DataTypeClass)):
66
+ from polars._utils.various import qualified_type_name
67
+
68
+ msg = f"DataTypeGroup items must be dtypes; found {qualified_type_name(it)!r}"
69
+ raise TypeError(msg)
70
+
71
+ dtype_group = super().__new__(cls, items)
72
+ dtype_group._match_base_type = match_base_type
73
+ return dtype_group
74
+
75
+ def __contains__(self, item: Any) -> bool:
76
+ if self._match_base_type and isinstance(item, (DataType, DataTypeClass)):
77
+ item = item.base_type()
78
+ return super().__contains__(item)
79
+
80
+
81
+ SIGNED_INTEGER_DTYPES: frozenset[PolarsIntegerType] = DataTypeGroup(
82
+ [
83
+ Int8,
84
+ Int16,
85
+ Int32,
86
+ Int64,
87
+ Int128,
88
+ ]
89
+ )
90
+ UNSIGNED_INTEGER_DTYPES: frozenset[PolarsIntegerType] = DataTypeGroup(
91
+ [
92
+ UInt8,
93
+ UInt16,
94
+ UInt32,
95
+ UInt64,
96
+ UInt128,
97
+ ]
98
+ )
99
+ INTEGER_DTYPES: frozenset[PolarsIntegerType] = (
100
+ SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
101
+ )
102
+ FLOAT_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([Float32, Float64])
103
+ NUMERIC_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
104
+ FLOAT_DTYPES | INTEGER_DTYPES | frozenset([Decimal])
105
+ )
106
+
107
+ DATETIME_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
108
+ [
109
+ Datetime,
110
+ Datetime("ms"),
111
+ Datetime("us"),
112
+ Datetime("ns"),
113
+ Datetime("ms", "*"),
114
+ Datetime("us", "*"),
115
+ Datetime("ns", "*"),
116
+ ]
117
+ )
118
+ DURATION_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
119
+ [
120
+ Duration,
121
+ Duration("ms"),
122
+ Duration("us"),
123
+ Duration("ns"),
124
+ ]
125
+ )
126
+ TEMPORAL_DTYPES: frozenset[PolarsTemporalType] = DataTypeGroup(
127
+ frozenset([Date, Time]) | DATETIME_DTYPES | DURATION_DTYPES
128
+ )
129
+
130
+ NESTED_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([List, Struct, Array])