polars-runtime-compat 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b5__cp39-abi3-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
- polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/datatypes/constants.py
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
if TYPE_CHECKING:
|
|
6
|
-
from polars._typing import TimeUnit
|
|
7
|
-
|
|
8
|
-
# Number of rows to scan by default when inferring datatypes
|
|
9
|
-
N_INFER_DEFAULT = 100
|
|
10
|
-
|
|
11
|
-
DTYPE_TEMPORAL_UNITS: frozenset[TimeUnit] = frozenset(["ns", "us", "ms"])
|
polars/datatypes/constructor.py
DELETED
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import functools
|
|
4
|
-
from decimal import Decimal as PyDecimal
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Callable
|
|
6
|
-
|
|
7
|
-
from polars import datatypes as dt
|
|
8
|
-
from polars._dependencies import numpy as np
|
|
9
|
-
|
|
10
|
-
# Module not available when building docs
|
|
11
|
-
try:
|
|
12
|
-
from polars._plr import PySeries
|
|
13
|
-
|
|
14
|
-
_DOCUMENTING = False
|
|
15
|
-
except ImportError:
|
|
16
|
-
_DOCUMENTING = True
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from collections.abc import Sequence
|
|
20
|
-
|
|
21
|
-
from polars._typing import PolarsDataType
|
|
22
|
-
|
|
23
|
-
if not _DOCUMENTING:
|
|
24
|
-
_POLARS_TYPE_TO_CONSTRUCTOR: dict[
|
|
25
|
-
PolarsDataType, Callable[[str, Sequence[Any], bool], PySeries]
|
|
26
|
-
] = {
|
|
27
|
-
dt.Float32: PySeries.new_opt_f32,
|
|
28
|
-
dt.Float64: PySeries.new_opt_f64,
|
|
29
|
-
dt.Int8: PySeries.new_opt_i8,
|
|
30
|
-
dt.Int16: PySeries.new_opt_i16,
|
|
31
|
-
dt.Int32: PySeries.new_opt_i32,
|
|
32
|
-
dt.Int64: PySeries.new_opt_i64,
|
|
33
|
-
dt.Int128: PySeries.new_opt_i128,
|
|
34
|
-
dt.UInt8: PySeries.new_opt_u8,
|
|
35
|
-
dt.UInt16: PySeries.new_opt_u16,
|
|
36
|
-
dt.UInt32: PySeries.new_opt_u32,
|
|
37
|
-
dt.UInt64: PySeries.new_opt_u64,
|
|
38
|
-
dt.UInt128: PySeries.new_opt_u128,
|
|
39
|
-
dt.Decimal: PySeries.new_decimal,
|
|
40
|
-
dt.Date: PySeries.new_opt_i32,
|
|
41
|
-
dt.Datetime: PySeries.new_opt_i64,
|
|
42
|
-
dt.Duration: PySeries.new_opt_i64,
|
|
43
|
-
dt.Time: PySeries.new_opt_i64,
|
|
44
|
-
dt.Boolean: PySeries.new_opt_bool,
|
|
45
|
-
dt.String: PySeries.new_str,
|
|
46
|
-
dt.Object: PySeries.new_object,
|
|
47
|
-
dt.Categorical: PySeries.new_str,
|
|
48
|
-
dt.Enum: PySeries.new_str,
|
|
49
|
-
dt.Binary: PySeries.new_binary,
|
|
50
|
-
dt.Null: PySeries.new_null,
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def polars_type_to_constructor(
|
|
55
|
-
dtype: PolarsDataType,
|
|
56
|
-
) -> Callable[[str, Sequence[Any], bool], PySeries]:
|
|
57
|
-
"""Get the right PySeries constructor for the given Polars dtype."""
|
|
58
|
-
# Special case for Array as it needs to pass the dtype argument on construction
|
|
59
|
-
if isinstance(dtype, dt.Array):
|
|
60
|
-
return functools.partial(PySeries.new_array, dtype=dtype)
|
|
61
|
-
|
|
62
|
-
try:
|
|
63
|
-
base_type = dtype.base_type()
|
|
64
|
-
return _POLARS_TYPE_TO_CONSTRUCTOR[base_type]
|
|
65
|
-
except KeyError: # pragma: no cover
|
|
66
|
-
msg = f"cannot construct PySeries for type {dtype!r}"
|
|
67
|
-
raise ValueError(msg) from None
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
_NUMPY_TYPE_TO_CONSTRUCTOR = None
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def _set_numpy_to_constructor() -> None:
|
|
74
|
-
global _NUMPY_TYPE_TO_CONSTRUCTOR
|
|
75
|
-
_NUMPY_TYPE_TO_CONSTRUCTOR = {
|
|
76
|
-
np.float32: PySeries.new_f32,
|
|
77
|
-
np.float64: PySeries.new_f64,
|
|
78
|
-
np.int8: PySeries.new_i8,
|
|
79
|
-
np.int16: PySeries.new_i16,
|
|
80
|
-
np.int32: PySeries.new_i32,
|
|
81
|
-
np.int64: PySeries.new_i64,
|
|
82
|
-
np.uint8: PySeries.new_u8,
|
|
83
|
-
np.uint16: PySeries.new_u16,
|
|
84
|
-
np.uint32: PySeries.new_u32,
|
|
85
|
-
np.uint64: PySeries.new_u64,
|
|
86
|
-
np.str_: PySeries.new_str,
|
|
87
|
-
np.bytes_: PySeries.new_binary,
|
|
88
|
-
np.bool_: PySeries.new_bool,
|
|
89
|
-
np.datetime64: PySeries.new_i64,
|
|
90
|
-
np.timedelta64: PySeries.new_i64,
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
@functools.lru_cache(maxsize=32)
|
|
95
|
-
def _normalise_numpy_dtype(dtype: Any) -> tuple[Any, Any]:
|
|
96
|
-
normalised_dtype = (
|
|
97
|
-
np.dtype(dtype.base.name) if dtype.kind in ("i", "u", "f") else dtype
|
|
98
|
-
).type
|
|
99
|
-
cast_as: Any = None
|
|
100
|
-
if normalised_dtype == np.float16:
|
|
101
|
-
normalised_dtype = cast_as = np.float32
|
|
102
|
-
elif normalised_dtype in (np.datetime64, np.timedelta64):
|
|
103
|
-
time_unit = np.datetime_data(dtype)[0]
|
|
104
|
-
if time_unit in dt.DTYPE_TEMPORAL_UNITS or (
|
|
105
|
-
time_unit == "D" and normalised_dtype == np.datetime64
|
|
106
|
-
):
|
|
107
|
-
cast_as = np.int64
|
|
108
|
-
else:
|
|
109
|
-
msg = (
|
|
110
|
-
"incorrect NumPy datetime resolution"
|
|
111
|
-
"\n\n'D' (datetime only), 'ms', 'us', and 'ns' resolutions are supported when converting from numpy.{datetime64,timedelta64}."
|
|
112
|
-
" Please cast to the closest supported unit before converting."
|
|
113
|
-
)
|
|
114
|
-
raise ValueError(msg)
|
|
115
|
-
return normalised_dtype, cast_as
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def numpy_values_and_dtype(
|
|
119
|
-
values: np.ndarray[Any, Any],
|
|
120
|
-
) -> tuple[np.ndarray[Any, Any], type]:
|
|
121
|
-
"""Return numpy values and their associated dtype, adjusting if required."""
|
|
122
|
-
# Create new dtype object from dtype base name so architecture specific
|
|
123
|
-
# dtypes (np.longlong np.ulonglong np.intc np.uintc np.longdouble, ...)
|
|
124
|
-
# get converted to their normalized dtype (np.int*, np.uint*, np.float*).
|
|
125
|
-
dtype, cast_as = _normalise_numpy_dtype(values.dtype)
|
|
126
|
-
if cast_as:
|
|
127
|
-
values = values.astype(cast_as)
|
|
128
|
-
return values, dtype
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def numpy_type_to_constructor(
|
|
132
|
-
values: np.ndarray[Any, Any], dtype: type[np.dtype[Any]]
|
|
133
|
-
) -> Callable[..., PySeries]:
|
|
134
|
-
"""Get the right PySeries constructor for the given Polars dtype."""
|
|
135
|
-
if _NUMPY_TYPE_TO_CONSTRUCTOR is None:
|
|
136
|
-
_set_numpy_to_constructor()
|
|
137
|
-
try:
|
|
138
|
-
return _NUMPY_TYPE_TO_CONSTRUCTOR[dtype] # type:ignore[index]
|
|
139
|
-
except KeyError:
|
|
140
|
-
if len(values) > 0:
|
|
141
|
-
first_non_nan = next(
|
|
142
|
-
(v for v in values if isinstance(v, np.ndarray) or v == v), None
|
|
143
|
-
)
|
|
144
|
-
if isinstance(first_non_nan, str):
|
|
145
|
-
return PySeries.new_str
|
|
146
|
-
if isinstance(first_non_nan, bytes):
|
|
147
|
-
return PySeries.new_binary
|
|
148
|
-
return PySeries.new_object
|
|
149
|
-
except NameError: # pragma: no cover
|
|
150
|
-
msg = f"'numpy' is required to convert numpy dtype {dtype!r}"
|
|
151
|
-
raise ModuleNotFoundError(msg) from None
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if not _DOCUMENTING:
|
|
155
|
-
_PY_TYPE_TO_CONSTRUCTOR = {
|
|
156
|
-
float: PySeries.new_opt_f64,
|
|
157
|
-
bool: PySeries.new_opt_bool,
|
|
158
|
-
int: PySeries.new_opt_i64,
|
|
159
|
-
str: PySeries.new_str,
|
|
160
|
-
bytes: PySeries.new_binary,
|
|
161
|
-
PyDecimal: PySeries.new_decimal,
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def py_type_to_constructor(py_type: type[Any]) -> Callable[..., PySeries]:
|
|
166
|
-
"""Get the right PySeries constructor for the given Python dtype."""
|
|
167
|
-
py_type = (
|
|
168
|
-
next((tp for tp in _PY_TYPE_TO_CONSTRUCTOR if issubclass(py_type, tp)), py_type)
|
|
169
|
-
if py_type not in _PY_TYPE_TO_CONSTRUCTOR
|
|
170
|
-
else py_type
|
|
171
|
-
)
|
|
172
|
-
return _PY_TYPE_TO_CONSTRUCTOR.get(py_type, PySeries.new_object)
|
polars/datatypes/convert.py
DELETED
|
@@ -1,366 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
import functools
|
|
5
|
-
import re
|
|
6
|
-
import sys
|
|
7
|
-
from collections.abc import Collection
|
|
8
|
-
from datetime import date, datetime, time, timedelta
|
|
9
|
-
from decimal import Decimal as PyDecimal
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
11
|
-
|
|
12
|
-
from polars._dependencies import numpy as np
|
|
13
|
-
from polars._dependencies import pyarrow as pa
|
|
14
|
-
from polars.datatypes.classes import (
|
|
15
|
-
Array,
|
|
16
|
-
Binary,
|
|
17
|
-
Boolean,
|
|
18
|
-
Categorical,
|
|
19
|
-
DataType,
|
|
20
|
-
DataTypeClass,
|
|
21
|
-
Date,
|
|
22
|
-
Datetime,
|
|
23
|
-
Decimal,
|
|
24
|
-
Duration,
|
|
25
|
-
Enum,
|
|
26
|
-
Field,
|
|
27
|
-
Float32,
|
|
28
|
-
Float64,
|
|
29
|
-
Int8,
|
|
30
|
-
Int16,
|
|
31
|
-
Int32,
|
|
32
|
-
Int64,
|
|
33
|
-
Int128,
|
|
34
|
-
List,
|
|
35
|
-
Null,
|
|
36
|
-
Object,
|
|
37
|
-
String,
|
|
38
|
-
Struct,
|
|
39
|
-
Time,
|
|
40
|
-
UInt8,
|
|
41
|
-
UInt16,
|
|
42
|
-
UInt32,
|
|
43
|
-
UInt64,
|
|
44
|
-
UInt128,
|
|
45
|
-
Unknown,
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
49
|
-
from polars._plr import dtype_str_repr as _dtype_str_repr
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
OptionType = type(Optional[type])
|
|
53
|
-
if sys.version_info >= (3, 10):
|
|
54
|
-
from types import NoneType, UnionType
|
|
55
|
-
else:
|
|
56
|
-
# infer equivalent class
|
|
57
|
-
NoneType = type(None)
|
|
58
|
-
UnionType = type(Union[int, float])
|
|
59
|
-
|
|
60
|
-
if TYPE_CHECKING:
|
|
61
|
-
from polars._typing import PolarsDataType, PythonDataType, TimeUnit
|
|
62
|
-
|
|
63
|
-
if sys.version_info >= (3, 10):
|
|
64
|
-
from typing import TypeGuard
|
|
65
|
-
else:
|
|
66
|
-
from typing_extensions import TypeGuard
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def is_polars_dtype(
|
|
70
|
-
dtype: Any,
|
|
71
|
-
*,
|
|
72
|
-
include_unknown: bool = False,
|
|
73
|
-
require_instantiated: bool = False,
|
|
74
|
-
) -> TypeGuard[PolarsDataType]:
|
|
75
|
-
"""Indicate whether the given input is a Polars dtype, or dtype specialization."""
|
|
76
|
-
check_classes = DataType if require_instantiated else (DataType, DataTypeClass)
|
|
77
|
-
is_dtype = isinstance(dtype, check_classes)
|
|
78
|
-
|
|
79
|
-
if not include_unknown:
|
|
80
|
-
return is_dtype and dtype != Unknown
|
|
81
|
-
else:
|
|
82
|
-
return is_dtype
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def unpack_dtypes(
|
|
86
|
-
*dtypes: PolarsDataType | None,
|
|
87
|
-
include_compound: bool = False,
|
|
88
|
-
) -> set[PolarsDataType]:
|
|
89
|
-
"""
|
|
90
|
-
Return a set of unique dtypes found in one or more (potentially compound) dtypes.
|
|
91
|
-
|
|
92
|
-
Parameters
|
|
93
|
-
----------
|
|
94
|
-
*dtypes
|
|
95
|
-
One or more Polars dtypes.
|
|
96
|
-
include_compound
|
|
97
|
-
* if True, any parent/compound dtypes (List, Struct) are included in the result.
|
|
98
|
-
* if False, only the child/scalar dtypes are returned from these types.
|
|
99
|
-
|
|
100
|
-
Examples
|
|
101
|
-
--------
|
|
102
|
-
>>> from polars.datatypes import unpack_dtypes
|
|
103
|
-
>>> list_dtype = [pl.List(pl.Float64)]
|
|
104
|
-
>>> struct_dtype = pl.Struct(
|
|
105
|
-
... [
|
|
106
|
-
... pl.Field("a", pl.Int64),
|
|
107
|
-
... pl.Field("b", pl.String),
|
|
108
|
-
... pl.Field("c", pl.List(pl.Float64)),
|
|
109
|
-
... ]
|
|
110
|
-
... )
|
|
111
|
-
>>> unpack_dtypes([struct_dtype, list_dtype]) # doctest: +IGNORE_RESULT
|
|
112
|
-
{Float64, Int64, String}
|
|
113
|
-
>>> unpack_dtypes(
|
|
114
|
-
... [struct_dtype, list_dtype], include_compound=True
|
|
115
|
-
... ) # doctest: +IGNORE_RESULT
|
|
116
|
-
{Float64, Int64, String, List(Float64), Struct([Field('a', Int64), Field('b', String), Field('c', List(Float64))])}
|
|
117
|
-
""" # noqa: W505
|
|
118
|
-
if not dtypes:
|
|
119
|
-
return set()
|
|
120
|
-
elif len(dtypes) == 1 and isinstance(dtypes[0], Collection):
|
|
121
|
-
dtypes = dtypes[0]
|
|
122
|
-
|
|
123
|
-
unpacked: set[PolarsDataType] = set()
|
|
124
|
-
for tp in dtypes:
|
|
125
|
-
if isinstance(tp, (List, Array)):
|
|
126
|
-
if include_compound:
|
|
127
|
-
unpacked.add(tp)
|
|
128
|
-
unpacked.update(unpack_dtypes(tp.inner, include_compound=include_compound))
|
|
129
|
-
elif isinstance(tp, Struct):
|
|
130
|
-
if include_compound:
|
|
131
|
-
unpacked.add(tp)
|
|
132
|
-
unpacked.update(unpack_dtypes(tp.fields, include_compound=include_compound)) # type: ignore[arg-type]
|
|
133
|
-
elif isinstance(tp, Field):
|
|
134
|
-
unpacked.update(unpack_dtypes(tp.dtype, include_compound=include_compound))
|
|
135
|
-
elif tp is not None and is_polars_dtype(tp):
|
|
136
|
-
unpacked.add(tp)
|
|
137
|
-
return unpacked
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
class _DataTypeMappings:
|
|
141
|
-
@property
|
|
142
|
-
@functools.lru_cache # noqa: B019
|
|
143
|
-
def DTYPE_TO_FFINAME(self) -> dict[PolarsDataType, str]:
|
|
144
|
-
return {
|
|
145
|
-
Binary: "binary",
|
|
146
|
-
Boolean: "bool",
|
|
147
|
-
Categorical: "categorical",
|
|
148
|
-
Date: "date",
|
|
149
|
-
Datetime: "datetime",
|
|
150
|
-
Decimal: "decimal",
|
|
151
|
-
Duration: "duration",
|
|
152
|
-
Float32: "f32",
|
|
153
|
-
Float64: "f64",
|
|
154
|
-
Int8: "i8",
|
|
155
|
-
Int16: "i16",
|
|
156
|
-
Int32: "i32",
|
|
157
|
-
Int64: "i64",
|
|
158
|
-
Int128: "i128",
|
|
159
|
-
List: "list",
|
|
160
|
-
Object: "object",
|
|
161
|
-
String: "str",
|
|
162
|
-
Struct: "struct",
|
|
163
|
-
Time: "time",
|
|
164
|
-
UInt8: "u8",
|
|
165
|
-
UInt16: "u16",
|
|
166
|
-
UInt32: "u32",
|
|
167
|
-
UInt64: "u64",
|
|
168
|
-
UInt128: "u128",
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
@property
|
|
172
|
-
@functools.lru_cache # noqa: B019
|
|
173
|
-
def DTYPE_TO_PY_TYPE(self) -> dict[PolarsDataType, PythonDataType]:
|
|
174
|
-
return {
|
|
175
|
-
Array: list,
|
|
176
|
-
Binary: bytes,
|
|
177
|
-
Boolean: bool,
|
|
178
|
-
Date: date,
|
|
179
|
-
Datetime: datetime,
|
|
180
|
-
Decimal: PyDecimal,
|
|
181
|
-
Duration: timedelta,
|
|
182
|
-
Float32: float,
|
|
183
|
-
Float64: float,
|
|
184
|
-
Int8: int,
|
|
185
|
-
Int16: int,
|
|
186
|
-
Int32: int,
|
|
187
|
-
Int64: int,
|
|
188
|
-
Int128: int,
|
|
189
|
-
List: list,
|
|
190
|
-
Null: None.__class__,
|
|
191
|
-
Object: object,
|
|
192
|
-
String: str,
|
|
193
|
-
Struct: dict,
|
|
194
|
-
Time: time,
|
|
195
|
-
UInt8: int,
|
|
196
|
-
UInt16: int,
|
|
197
|
-
UInt32: int,
|
|
198
|
-
UInt64: int,
|
|
199
|
-
UInt128: int,
|
|
200
|
-
# the below mappings are appropriate as we restrict cat/enum to strings
|
|
201
|
-
Enum: str,
|
|
202
|
-
Categorical: str,
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
@property
|
|
206
|
-
@functools.lru_cache # noqa: B019
|
|
207
|
-
def NUMPY_KIND_AND_ITEMSIZE_TO_DTYPE(self) -> dict[tuple[str, int], PolarsDataType]:
|
|
208
|
-
return {
|
|
209
|
-
# (np.dtype().kind, np.dtype().itemsize)
|
|
210
|
-
("M", 8): Datetime,
|
|
211
|
-
("b", 1): Boolean,
|
|
212
|
-
("f", 2): Float32,
|
|
213
|
-
("f", 4): Float32,
|
|
214
|
-
("f", 8): Float64,
|
|
215
|
-
("i", 1): Int8,
|
|
216
|
-
("i", 2): Int16,
|
|
217
|
-
("i", 4): Int32,
|
|
218
|
-
("i", 8): Int64,
|
|
219
|
-
("m", 8): Duration,
|
|
220
|
-
("u", 1): UInt8,
|
|
221
|
-
("u", 2): UInt16,
|
|
222
|
-
("u", 4): UInt32,
|
|
223
|
-
("u", 8): UInt64,
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
@property
|
|
227
|
-
@functools.lru_cache # noqa: B019
|
|
228
|
-
def PY_TYPE_TO_ARROW_TYPE(self) -> dict[PythonDataType, pa.lib.DataType]:
|
|
229
|
-
return {
|
|
230
|
-
bool: pa.bool_(),
|
|
231
|
-
date: pa.date32(),
|
|
232
|
-
datetime: pa.timestamp("us"),
|
|
233
|
-
float: pa.float64(),
|
|
234
|
-
int: pa.int64(),
|
|
235
|
-
str: pa.large_utf8(),
|
|
236
|
-
time: pa.time64("us"),
|
|
237
|
-
timedelta: pa.duration("us"),
|
|
238
|
-
None.__class__: pa.null(),
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
@property
|
|
242
|
-
@functools.lru_cache # noqa: B019
|
|
243
|
-
def REPR_TO_DTYPE(self) -> dict[str, PolarsDataType]:
|
|
244
|
-
def _dtype_str_repr_safe(o: Any) -> PolarsDataType | None:
|
|
245
|
-
try:
|
|
246
|
-
return _dtype_str_repr(o.base_type()).split("[")[0] # type: ignore[return-value]
|
|
247
|
-
except TypeError:
|
|
248
|
-
return None
|
|
249
|
-
|
|
250
|
-
return {
|
|
251
|
-
_dtype_str_repr_safe(obj): obj # type: ignore[misc]
|
|
252
|
-
for obj in globals().values()
|
|
253
|
-
if is_polars_dtype(obj) and _dtype_str_repr_safe(obj) is not None
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
# Initialize once (poor man's singleton :)
|
|
258
|
-
DataTypeMappings = _DataTypeMappings()
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
def dtype_to_ffiname(dtype: PolarsDataType) -> str:
|
|
262
|
-
"""Return FFI function name associated with the given Polars dtype."""
|
|
263
|
-
try:
|
|
264
|
-
dtype = dtype.base_type()
|
|
265
|
-
return DataTypeMappings.DTYPE_TO_FFINAME[dtype]
|
|
266
|
-
except KeyError: # pragma: no cover
|
|
267
|
-
msg = f"conversion of polars data type {dtype!r} to FFI not implemented"
|
|
268
|
-
raise NotImplementedError(msg) from None
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
def dtype_to_py_type(dtype: PolarsDataType) -> PythonDataType:
|
|
272
|
-
"""Convert a Polars dtype to a Python dtype."""
|
|
273
|
-
try:
|
|
274
|
-
dtype = dtype.base_type()
|
|
275
|
-
return DataTypeMappings.DTYPE_TO_PY_TYPE[dtype]
|
|
276
|
-
except KeyError: # pragma: no cover
|
|
277
|
-
msg = f"conversion of polars data type {dtype!r} to Python type not implemented"
|
|
278
|
-
raise NotImplementedError(msg) from None
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
def py_type_to_arrow_type(dtype: PythonDataType) -> pa.lib.DataType:
|
|
282
|
-
"""Convert a Python dtype to an Arrow dtype."""
|
|
283
|
-
try:
|
|
284
|
-
return DataTypeMappings.PY_TYPE_TO_ARROW_TYPE[dtype]
|
|
285
|
-
except KeyError: # pragma: no cover
|
|
286
|
-
msg = f"cannot parse Python data type {dtype!r} into Arrow data type"
|
|
287
|
-
raise ValueError(msg) from None
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
def dtype_short_repr_to_dtype(dtype_string: str | None) -> PolarsDataType | None:
|
|
291
|
-
"""Map a PolarsDataType short repr (eg: 'i64', 'list[str]') back into a dtype."""
|
|
292
|
-
if dtype_string is None:
|
|
293
|
-
return None
|
|
294
|
-
|
|
295
|
-
m = re.match(r"^(\w+)(?:\[(.+)\])?$", dtype_string)
|
|
296
|
-
if m is None:
|
|
297
|
-
return None
|
|
298
|
-
|
|
299
|
-
dtype_base, subtype = m.groups()
|
|
300
|
-
dtype = DataTypeMappings.REPR_TO_DTYPE.get(dtype_base)
|
|
301
|
-
if dtype and subtype:
|
|
302
|
-
# TODO: further-improve handling for nested types (such as List,Struct)
|
|
303
|
-
try:
|
|
304
|
-
if dtype == Decimal:
|
|
305
|
-
subtype = (None, int(subtype))
|
|
306
|
-
else:
|
|
307
|
-
subtype = (
|
|
308
|
-
s.strip("'\" ") for s in subtype.replace("μs", "us").split(",")
|
|
309
|
-
)
|
|
310
|
-
return dtype(*subtype) # type: ignore[operator]
|
|
311
|
-
except ValueError:
|
|
312
|
-
pass
|
|
313
|
-
return dtype
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
def supported_numpy_char_code(dtype_char: str) -> bool:
|
|
317
|
-
"""Check if the input can be mapped to a Polars dtype."""
|
|
318
|
-
dtype = np.dtype(dtype_char)
|
|
319
|
-
return (
|
|
320
|
-
dtype.kind,
|
|
321
|
-
dtype.itemsize,
|
|
322
|
-
) in DataTypeMappings.NUMPY_KIND_AND_ITEMSIZE_TO_DTYPE
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
def numpy_char_code_to_dtype(dtype_char: str) -> PolarsDataType:
|
|
326
|
-
"""Convert a numpy character dtype to a Polars dtype."""
|
|
327
|
-
dtype = np.dtype(dtype_char)
|
|
328
|
-
if dtype.kind == "U":
|
|
329
|
-
return String
|
|
330
|
-
elif dtype.kind == "S":
|
|
331
|
-
return Binary
|
|
332
|
-
try:
|
|
333
|
-
return DataTypeMappings.NUMPY_KIND_AND_ITEMSIZE_TO_DTYPE[
|
|
334
|
-
dtype.kind, dtype.itemsize
|
|
335
|
-
]
|
|
336
|
-
except KeyError: # pragma: no cover
|
|
337
|
-
msg = f"cannot parse numpy data type {dtype!r} into Polars data type"
|
|
338
|
-
raise ValueError(msg) from None
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
def maybe_cast(el: Any, dtype: PolarsDataType) -> Any:
|
|
342
|
-
"""Try casting a value to a value that is valid for the given Polars dtype."""
|
|
343
|
-
# cast el if it doesn't match
|
|
344
|
-
from polars._utils.convert import (
|
|
345
|
-
datetime_to_int,
|
|
346
|
-
timedelta_to_int,
|
|
347
|
-
)
|
|
348
|
-
|
|
349
|
-
time_unit: TimeUnit
|
|
350
|
-
if isinstance(el, datetime):
|
|
351
|
-
time_unit = getattr(dtype, "time_unit", "us")
|
|
352
|
-
return datetime_to_int(el, time_unit)
|
|
353
|
-
elif isinstance(el, timedelta):
|
|
354
|
-
time_unit = getattr(dtype, "time_unit", "us")
|
|
355
|
-
return timedelta_to_int(el, time_unit)
|
|
356
|
-
|
|
357
|
-
py_type = dtype_to_py_type(dtype)
|
|
358
|
-
if not isinstance(el, py_type):
|
|
359
|
-
try:
|
|
360
|
-
el = py_type(el) # type: ignore[call-arg]
|
|
361
|
-
except Exception:
|
|
362
|
-
from polars._utils.various import qualified_type_name
|
|
363
|
-
|
|
364
|
-
msg = f"cannot convert Python type {qualified_type_name(el)!r} to {dtype!r}"
|
|
365
|
-
raise TypeError(msg) from None
|
|
366
|
-
return el
|
polars/datatypes/group.py
DELETED
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
4
|
-
|
|
5
|
-
from polars.datatypes.classes import (
|
|
6
|
-
Array,
|
|
7
|
-
DataType,
|
|
8
|
-
DataTypeClass,
|
|
9
|
-
Date,
|
|
10
|
-
Datetime,
|
|
11
|
-
Decimal,
|
|
12
|
-
Duration,
|
|
13
|
-
Float32,
|
|
14
|
-
Float64,
|
|
15
|
-
Int8,
|
|
16
|
-
Int16,
|
|
17
|
-
Int32,
|
|
18
|
-
Int64,
|
|
19
|
-
Int128,
|
|
20
|
-
List,
|
|
21
|
-
Struct,
|
|
22
|
-
Time,
|
|
23
|
-
UInt8,
|
|
24
|
-
UInt16,
|
|
25
|
-
UInt32,
|
|
26
|
-
UInt64,
|
|
27
|
-
UInt128,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
if TYPE_CHECKING:
|
|
31
|
-
import sys
|
|
32
|
-
from collections.abc import Iterable
|
|
33
|
-
|
|
34
|
-
from polars._typing import (
|
|
35
|
-
PolarsDataType,
|
|
36
|
-
PolarsIntegerType,
|
|
37
|
-
PolarsTemporalType,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
if sys.version_info >= (3, 11):
|
|
41
|
-
from typing import Self
|
|
42
|
-
else:
|
|
43
|
-
from typing_extensions import Self
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class DataTypeGroup(frozenset): # type: ignore[type-arg]
|
|
47
|
-
"""Group of data types."""
|
|
48
|
-
|
|
49
|
-
_match_base_type: bool
|
|
50
|
-
|
|
51
|
-
def __new__(
|
|
52
|
-
cls, items: Iterable[DataType | DataTypeClass], *, match_base_type: bool = True
|
|
53
|
-
) -> Self:
|
|
54
|
-
"""
|
|
55
|
-
Construct a DataTypeGroup.
|
|
56
|
-
|
|
57
|
-
Parameters
|
|
58
|
-
----------
|
|
59
|
-
items :
|
|
60
|
-
iterable of data types
|
|
61
|
-
match_base_type:
|
|
62
|
-
match the base type
|
|
63
|
-
"""
|
|
64
|
-
for it in items:
|
|
65
|
-
if not isinstance(it, (DataType, DataTypeClass)):
|
|
66
|
-
from polars._utils.various import qualified_type_name
|
|
67
|
-
|
|
68
|
-
msg = f"DataTypeGroup items must be dtypes; found {qualified_type_name(it)!r}"
|
|
69
|
-
raise TypeError(msg)
|
|
70
|
-
|
|
71
|
-
dtype_group = super().__new__(cls, items)
|
|
72
|
-
dtype_group._match_base_type = match_base_type
|
|
73
|
-
return dtype_group
|
|
74
|
-
|
|
75
|
-
def __contains__(self, item: Any) -> bool:
|
|
76
|
-
if self._match_base_type and isinstance(item, (DataType, DataTypeClass)):
|
|
77
|
-
item = item.base_type()
|
|
78
|
-
return super().__contains__(item)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
SIGNED_INTEGER_DTYPES: frozenset[PolarsIntegerType] = DataTypeGroup(
|
|
82
|
-
[
|
|
83
|
-
Int8,
|
|
84
|
-
Int16,
|
|
85
|
-
Int32,
|
|
86
|
-
Int64,
|
|
87
|
-
Int128,
|
|
88
|
-
]
|
|
89
|
-
)
|
|
90
|
-
UNSIGNED_INTEGER_DTYPES: frozenset[PolarsIntegerType] = DataTypeGroup(
|
|
91
|
-
[
|
|
92
|
-
UInt8,
|
|
93
|
-
UInt16,
|
|
94
|
-
UInt32,
|
|
95
|
-
UInt64,
|
|
96
|
-
UInt128,
|
|
97
|
-
]
|
|
98
|
-
)
|
|
99
|
-
INTEGER_DTYPES: frozenset[PolarsIntegerType] = (
|
|
100
|
-
SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
|
|
101
|
-
)
|
|
102
|
-
FLOAT_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([Float32, Float64])
|
|
103
|
-
NUMERIC_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
|
|
104
|
-
FLOAT_DTYPES | INTEGER_DTYPES | frozenset([Decimal])
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
DATETIME_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
|
|
108
|
-
[
|
|
109
|
-
Datetime,
|
|
110
|
-
Datetime("ms"),
|
|
111
|
-
Datetime("us"),
|
|
112
|
-
Datetime("ns"),
|
|
113
|
-
Datetime("ms", "*"),
|
|
114
|
-
Datetime("us", "*"),
|
|
115
|
-
Datetime("ns", "*"),
|
|
116
|
-
]
|
|
117
|
-
)
|
|
118
|
-
DURATION_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
|
|
119
|
-
[
|
|
120
|
-
Duration,
|
|
121
|
-
Duration("ms"),
|
|
122
|
-
Duration("us"),
|
|
123
|
-
Duration("ns"),
|
|
124
|
-
]
|
|
125
|
-
)
|
|
126
|
-
TEMPORAL_DTYPES: frozenset[PolarsTemporalType] = DataTypeGroup(
|
|
127
|
-
frozenset([Date, Time]) | DATETIME_DTYPES | DURATION_DTYPES
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
NESTED_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([List, Struct, Array])
|