polars-runtime-compat 1.34.0b3__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b5__cp39-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
- polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/datatypes/classes.py
DELETED
|
@@ -1,1213 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
import enum
|
|
5
|
-
from collections import OrderedDict
|
|
6
|
-
from collections.abc import Mapping
|
|
7
|
-
from datetime import tzinfo
|
|
8
|
-
from inspect import isclass
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Callable, Generic, TypeVar, overload
|
|
10
|
-
|
|
11
|
-
import polars._reexport as pl
|
|
12
|
-
import polars.datatypes
|
|
13
|
-
import polars.functions as F
|
|
14
|
-
|
|
15
|
-
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
16
|
-
import polars._plr as plr
|
|
17
|
-
from polars._plr import PyCategories
|
|
18
|
-
from polars._plr import dtype_str_repr as _dtype_str_repr
|
|
19
|
-
|
|
20
|
-
import polars.datatypes.classes as pldt
|
|
21
|
-
|
|
22
|
-
if TYPE_CHECKING:
|
|
23
|
-
from collections.abc import Iterable, Iterator, Sequence
|
|
24
|
-
|
|
25
|
-
from polars import Series
|
|
26
|
-
from polars._typing import (
|
|
27
|
-
CategoricalOrdering,
|
|
28
|
-
PolarsDataType,
|
|
29
|
-
PythonDataType,
|
|
30
|
-
SchemaDict,
|
|
31
|
-
TimeUnit,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
T = TypeVar("T")
|
|
36
|
-
R = TypeVar("R")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class classinstmethod(Generic[R]):
|
|
40
|
-
"""Decorator that allows a method to be called from the class OR instance."""
|
|
41
|
-
|
|
42
|
-
def __init__(self, func: Callable[..., R]) -> None:
|
|
43
|
-
self.func = func
|
|
44
|
-
|
|
45
|
-
def __get__(self, instance: Any, type_: Any) -> Callable[..., R]:
|
|
46
|
-
if instance is not None:
|
|
47
|
-
return self.func.__get__(instance, type_)
|
|
48
|
-
return self.func.__get__(type_, type_)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class DataTypeClass(type):
|
|
52
|
-
"""Metaclass for nicely printing DataType classes."""
|
|
53
|
-
|
|
54
|
-
def __repr__(cls) -> str:
|
|
55
|
-
return cls.__name__
|
|
56
|
-
|
|
57
|
-
def _string_repr(cls) -> str:
|
|
58
|
-
return _dtype_str_repr(cls)
|
|
59
|
-
|
|
60
|
-
# Methods below defined here in signature only to satisfy mypy
|
|
61
|
-
|
|
62
|
-
@classmethod
|
|
63
|
-
def base_type(cls) -> DataTypeClass: # noqa: D102
|
|
64
|
-
...
|
|
65
|
-
|
|
66
|
-
@classmethod
|
|
67
|
-
def is_(cls, other: PolarsDataType) -> bool: # noqa: D102
|
|
68
|
-
...
|
|
69
|
-
|
|
70
|
-
@classmethod
|
|
71
|
-
def is_numeric(cls) -> bool: # noqa: D102
|
|
72
|
-
...
|
|
73
|
-
|
|
74
|
-
@classmethod
|
|
75
|
-
def is_decimal(cls) -> bool: # noqa: D102
|
|
76
|
-
...
|
|
77
|
-
|
|
78
|
-
@classmethod
|
|
79
|
-
def is_integer(cls) -> bool: # noqa: D102
|
|
80
|
-
...
|
|
81
|
-
|
|
82
|
-
@classmethod
|
|
83
|
-
def is_object(cls) -> bool: # noqa: D102
|
|
84
|
-
...
|
|
85
|
-
|
|
86
|
-
@classmethod
|
|
87
|
-
def is_signed_integer(cls) -> bool: # noqa: D102
|
|
88
|
-
...
|
|
89
|
-
|
|
90
|
-
@classmethod
|
|
91
|
-
def is_unsigned_integer(cls) -> bool: # noqa: D102
|
|
92
|
-
...
|
|
93
|
-
|
|
94
|
-
@classmethod
|
|
95
|
-
def is_float(cls) -> bool: # noqa: D102
|
|
96
|
-
...
|
|
97
|
-
|
|
98
|
-
@classmethod
|
|
99
|
-
def is_temporal(cls) -> bool: # noqa: D102
|
|
100
|
-
...
|
|
101
|
-
|
|
102
|
-
@classmethod
|
|
103
|
-
def is_nested(cls) -> bool: # noqa: D102
|
|
104
|
-
...
|
|
105
|
-
|
|
106
|
-
@classmethod
|
|
107
|
-
def from_python(cls, py_type: PythonDataType) -> PolarsDataType: # noqa: D102
|
|
108
|
-
...
|
|
109
|
-
|
|
110
|
-
@classmethod
|
|
111
|
-
def to_python(cls) -> PythonDataType: # noqa: D102
|
|
112
|
-
...
|
|
113
|
-
|
|
114
|
-
@classmethod
|
|
115
|
-
def to_dtype_expr(cls) -> pl.DataTypeExpr: # noqa: D102
|
|
116
|
-
...
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class DataType(metaclass=DataTypeClass):
|
|
120
|
-
"""Base class for all Polars data types."""
|
|
121
|
-
|
|
122
|
-
def _string_repr(self) -> str:
|
|
123
|
-
return _dtype_str_repr(self)
|
|
124
|
-
|
|
125
|
-
@overload # type: ignore[override]
|
|
126
|
-
def __eq__(self, other: pl.DataTypeExpr) -> pl.Expr: ...
|
|
127
|
-
|
|
128
|
-
@overload
|
|
129
|
-
def __eq__(self, other: PolarsDataType) -> bool: ...
|
|
130
|
-
|
|
131
|
-
def __eq__(self, other: pl.DataTypeExpr | PolarsDataType) -> pl.Expr | bool:
|
|
132
|
-
if isinstance(other, pl.DataTypeExpr):
|
|
133
|
-
return self.to_dtype_expr() == other
|
|
134
|
-
elif type(other) is DataTypeClass:
|
|
135
|
-
return issubclass(other, type(self))
|
|
136
|
-
else:
|
|
137
|
-
return isinstance(other, type(self))
|
|
138
|
-
|
|
139
|
-
def __hash__(self) -> int:
|
|
140
|
-
return hash(self.__class__)
|
|
141
|
-
|
|
142
|
-
def __repr__(self) -> str:
|
|
143
|
-
return self.__class__.__name__
|
|
144
|
-
|
|
145
|
-
@classmethod
|
|
146
|
-
def base_type(cls) -> DataTypeClass:
|
|
147
|
-
"""
|
|
148
|
-
Return this DataType's fundamental/root type class.
|
|
149
|
-
|
|
150
|
-
Examples
|
|
151
|
-
--------
|
|
152
|
-
>>> pl.Datetime("ns").base_type()
|
|
153
|
-
Datetime
|
|
154
|
-
>>> pl.List(pl.Int32).base_type()
|
|
155
|
-
List
|
|
156
|
-
>>> pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)]).base_type()
|
|
157
|
-
Struct
|
|
158
|
-
"""
|
|
159
|
-
return cls
|
|
160
|
-
|
|
161
|
-
@classinstmethod
|
|
162
|
-
def is_(self, other: PolarsDataType) -> bool:
|
|
163
|
-
"""
|
|
164
|
-
Check if this DataType is the same as another DataType.
|
|
165
|
-
|
|
166
|
-
This is a stricter check than `self == other`, as it enforces an exact
|
|
167
|
-
match of all dtype attributes for nested and/or uninitialised dtypes.
|
|
168
|
-
|
|
169
|
-
Parameters
|
|
170
|
-
----------
|
|
171
|
-
other
|
|
172
|
-
the other Polars dtype to compare with.
|
|
173
|
-
|
|
174
|
-
Examples
|
|
175
|
-
--------
|
|
176
|
-
>>> pl.List == pl.List(pl.Int32)
|
|
177
|
-
True
|
|
178
|
-
>>> pl.List.is_(pl.List(pl.Int32))
|
|
179
|
-
False
|
|
180
|
-
"""
|
|
181
|
-
return self == other and hash(self) == hash(other)
|
|
182
|
-
|
|
183
|
-
@classmethod
|
|
184
|
-
def is_numeric(cls) -> bool:
|
|
185
|
-
"""Check whether the data type is a numeric type."""
|
|
186
|
-
return issubclass(cls, NumericType)
|
|
187
|
-
|
|
188
|
-
@classmethod
|
|
189
|
-
def is_decimal(cls) -> bool:
|
|
190
|
-
"""Check whether the data type is a decimal type."""
|
|
191
|
-
return issubclass(cls, Decimal)
|
|
192
|
-
|
|
193
|
-
@classmethod
|
|
194
|
-
def is_integer(cls) -> bool:
|
|
195
|
-
"""Check whether the data type is an integer type."""
|
|
196
|
-
return issubclass(cls, IntegerType)
|
|
197
|
-
|
|
198
|
-
@classmethod
|
|
199
|
-
def is_object(cls) -> bool:
|
|
200
|
-
"""Check whether the data type is an object type."""
|
|
201
|
-
return issubclass(cls, ObjectType)
|
|
202
|
-
|
|
203
|
-
@classmethod
|
|
204
|
-
def is_signed_integer(cls) -> bool:
|
|
205
|
-
"""Check whether the data type is a signed integer type."""
|
|
206
|
-
return issubclass(cls, SignedIntegerType)
|
|
207
|
-
|
|
208
|
-
@classmethod
|
|
209
|
-
def is_unsigned_integer(cls) -> bool:
|
|
210
|
-
"""Check whether the data type is an unsigned integer type."""
|
|
211
|
-
return issubclass(cls, UnsignedIntegerType)
|
|
212
|
-
|
|
213
|
-
@classmethod
|
|
214
|
-
def is_float(cls) -> bool:
|
|
215
|
-
"""Check whether the data type is a floating point type."""
|
|
216
|
-
return issubclass(cls, FloatType)
|
|
217
|
-
|
|
218
|
-
@classmethod
|
|
219
|
-
def is_temporal(cls) -> bool:
|
|
220
|
-
"""Check whether the data type is a temporal type."""
|
|
221
|
-
return issubclass(cls, TemporalType)
|
|
222
|
-
|
|
223
|
-
@classmethod
|
|
224
|
-
def is_nested(cls) -> bool:
|
|
225
|
-
"""Check whether the data type is a nested type."""
|
|
226
|
-
return issubclass(cls, NestedType)
|
|
227
|
-
|
|
228
|
-
@classmethod
|
|
229
|
-
def from_python(cls, py_type: PythonDataType) -> PolarsDataType:
|
|
230
|
-
"""
|
|
231
|
-
Return the Polars data type corresponding to a given Python type.
|
|
232
|
-
|
|
233
|
-
Notes
|
|
234
|
-
-----
|
|
235
|
-
Not every Python type has a corresponding Polars data type; in general
|
|
236
|
-
you should declare Polars data types explicitly to exactly specify
|
|
237
|
-
the desired type and its properties (such as scale/unit).
|
|
238
|
-
|
|
239
|
-
Examples
|
|
240
|
-
--------
|
|
241
|
-
>>> pl.DataType.from_python(int)
|
|
242
|
-
Int64
|
|
243
|
-
>>> pl.DataType.from_python(float)
|
|
244
|
-
Float64
|
|
245
|
-
>>> from datetime import tzinfo
|
|
246
|
-
>>> pl.DataType.from_python(tzinfo) # doctest: +SKIP
|
|
247
|
-
TypeError: cannot parse input <class 'datetime.tzinfo'> into Polars data type
|
|
248
|
-
"""
|
|
249
|
-
from polars.datatypes._parse import parse_into_dtype
|
|
250
|
-
|
|
251
|
-
return parse_into_dtype(py_type)
|
|
252
|
-
|
|
253
|
-
@classinstmethod
|
|
254
|
-
def to_python(self) -> PythonDataType:
|
|
255
|
-
"""
|
|
256
|
-
Return the Python type corresponding to this Polars data type.
|
|
257
|
-
|
|
258
|
-
Examples
|
|
259
|
-
--------
|
|
260
|
-
>>> pl.Int16().to_python()
|
|
261
|
-
<class 'int'>
|
|
262
|
-
>>> pl.Float32().to_python()
|
|
263
|
-
<class 'float'>
|
|
264
|
-
>>> pl.Array(pl.Date(), 10).to_python()
|
|
265
|
-
<class 'list'>
|
|
266
|
-
"""
|
|
267
|
-
from polars.datatypes import dtype_to_py_type
|
|
268
|
-
|
|
269
|
-
return dtype_to_py_type(self)
|
|
270
|
-
|
|
271
|
-
@classinstmethod
|
|
272
|
-
def to_dtype_expr(self) -> pl.DataTypeExpr:
|
|
273
|
-
"""
|
|
274
|
-
Return a :class:`DataTypeExpr` with a static :class:`DataType`.
|
|
275
|
-
|
|
276
|
-
Examples
|
|
277
|
-
--------
|
|
278
|
-
>>> pl.Int16().to_dtype_expr().collect_dtype({})
|
|
279
|
-
Int16
|
|
280
|
-
"""
|
|
281
|
-
from polars._plr import PyDataTypeExpr
|
|
282
|
-
|
|
283
|
-
return pl.DataTypeExpr._from_pydatatype_expr(PyDataTypeExpr.from_dtype(self))
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
class NumericType(DataType):
|
|
287
|
-
"""Base class for numeric data types."""
|
|
288
|
-
|
|
289
|
-
@classmethod
|
|
290
|
-
def max(cls) -> pl.Expr:
|
|
291
|
-
"""
|
|
292
|
-
Return a literal expression representing the maximum value of this data type.
|
|
293
|
-
|
|
294
|
-
Examples
|
|
295
|
-
--------
|
|
296
|
-
>>> pl.select(pl.Int8.max() == 127)
|
|
297
|
-
shape: (1, 1)
|
|
298
|
-
┌─────────┐
|
|
299
|
-
│ literal │
|
|
300
|
-
│ --- │
|
|
301
|
-
│ bool │
|
|
302
|
-
╞═════════╡
|
|
303
|
-
│ true │
|
|
304
|
-
└─────────┘
|
|
305
|
-
"""
|
|
306
|
-
return pl.Expr._from_pyexpr(plr._get_dtype_max(cls))
|
|
307
|
-
|
|
308
|
-
@classmethod
|
|
309
|
-
def min(cls) -> pl.Expr:
|
|
310
|
-
"""
|
|
311
|
-
Return a literal expression representing the minimum value of this data type.
|
|
312
|
-
|
|
313
|
-
Examples
|
|
314
|
-
--------
|
|
315
|
-
>>> pl.select(pl.Int8.min() == -128)
|
|
316
|
-
shape: (1, 1)
|
|
317
|
-
┌─────────┐
|
|
318
|
-
│ literal │
|
|
319
|
-
│ --- │
|
|
320
|
-
│ bool │
|
|
321
|
-
╞═════════╡
|
|
322
|
-
│ true │
|
|
323
|
-
└─────────┘
|
|
324
|
-
"""
|
|
325
|
-
return pl.Expr._from_pyexpr(plr._get_dtype_min(cls))
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
class IntegerType(NumericType):
|
|
329
|
-
"""Base class for integer data types."""
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
class SignedIntegerType(IntegerType):
|
|
333
|
-
"""Base class for signed integer data types."""
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
class UnsignedIntegerType(IntegerType):
|
|
337
|
-
"""Base class for unsigned integer data types."""
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
class FloatType(NumericType):
|
|
341
|
-
"""Base class for float data types."""
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
class TemporalType(DataType):
|
|
345
|
-
"""Base class for temporal data types."""
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
class NestedType(DataType):
|
|
349
|
-
"""Base class for nested data types."""
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
class ObjectType(DataType):
|
|
353
|
-
"""Base class for object data types."""
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
class Int8(SignedIntegerType):
|
|
357
|
-
"""8-bit signed integer type."""
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
class Int16(SignedIntegerType):
|
|
361
|
-
"""16-bit signed integer type."""
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
class Int32(SignedIntegerType):
|
|
365
|
-
"""32-bit signed integer type."""
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
class Int64(SignedIntegerType):
|
|
369
|
-
"""64-bit signed integer type."""
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
class Int128(SignedIntegerType):
|
|
373
|
-
"""
|
|
374
|
-
128-bit signed integer type.
|
|
375
|
-
|
|
376
|
-
.. warning::
|
|
377
|
-
This functionality is considered **unstable**.
|
|
378
|
-
It is a work-in-progress feature and may not always work as expected.
|
|
379
|
-
It may be changed at any point without it being considered a breaking change.
|
|
380
|
-
"""
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
class UInt8(UnsignedIntegerType):
|
|
384
|
-
"""8-bit unsigned integer type."""
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
class UInt16(UnsignedIntegerType):
|
|
388
|
-
"""16-bit unsigned integer type."""
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
class UInt32(UnsignedIntegerType):
|
|
392
|
-
"""32-bit unsigned integer type."""
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
class UInt64(UnsignedIntegerType):
|
|
396
|
-
"""64-bit unsigned integer type."""
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
class UInt128(UnsignedIntegerType):
|
|
400
|
-
"""128-bit unsigned integer type.
|
|
401
|
-
|
|
402
|
-
.. warning::
|
|
403
|
-
This functionality is considered **unstable**.
|
|
404
|
-
It is a work-in-progress feature and may not always work as expected.
|
|
405
|
-
It may be changed at any point without it being considered a breaking change.
|
|
406
|
-
"""
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
class Float32(FloatType):
|
|
410
|
-
"""32-bit floating point type."""
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
class Float64(FloatType):
|
|
414
|
-
"""64-bit floating point type."""
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
class Decimal(NumericType):
|
|
418
|
-
"""
|
|
419
|
-
Decimal 128-bit type with an optional precision and non-negative scale.
|
|
420
|
-
|
|
421
|
-
.. warning::
|
|
422
|
-
This functionality is considered **unstable**.
|
|
423
|
-
It is a work-in-progress feature and may not always work as expected.
|
|
424
|
-
It may be changed at any point without it being considered a breaking change.
|
|
425
|
-
|
|
426
|
-
Parameters
|
|
427
|
-
----------
|
|
428
|
-
precision
|
|
429
|
-
Maximum number of digits in each number.
|
|
430
|
-
If set to `None` (default), the precision is inferred.
|
|
431
|
-
scale
|
|
432
|
-
Number of digits to the right of the decimal point in each number.
|
|
433
|
-
"""
|
|
434
|
-
|
|
435
|
-
precision: int | None
|
|
436
|
-
scale: int
|
|
437
|
-
|
|
438
|
-
def __init__(
|
|
439
|
-
self,
|
|
440
|
-
precision: int | None = None,
|
|
441
|
-
scale: int = 0,
|
|
442
|
-
) -> None:
|
|
443
|
-
# Issuing the warning on `__init__` does not trigger when the class is used
|
|
444
|
-
# without being instantiated, but it's better than nothing
|
|
445
|
-
from polars._utils.unstable import issue_unstable_warning
|
|
446
|
-
|
|
447
|
-
issue_unstable_warning(
|
|
448
|
-
"the Decimal data type is considered unstable."
|
|
449
|
-
" It is a work-in-progress feature and may not always work as expected."
|
|
450
|
-
)
|
|
451
|
-
|
|
452
|
-
if precision is None:
|
|
453
|
-
precision = 38
|
|
454
|
-
|
|
455
|
-
self.precision = precision
|
|
456
|
-
self.scale = scale
|
|
457
|
-
|
|
458
|
-
def __repr__(self) -> str:
|
|
459
|
-
return (
|
|
460
|
-
f"{self.__class__.__name__}(precision={self.precision}, scale={self.scale})"
|
|
461
|
-
)
|
|
462
|
-
|
|
463
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
464
|
-
# allow comparing object instances to class
|
|
465
|
-
if type(other) is DataTypeClass and issubclass(other, Decimal):
|
|
466
|
-
return True
|
|
467
|
-
elif isinstance(other, Decimal):
|
|
468
|
-
return self.precision == other.precision and self.scale == other.scale
|
|
469
|
-
else:
|
|
470
|
-
return False
|
|
471
|
-
|
|
472
|
-
def __hash__(self) -> int:
|
|
473
|
-
return hash((self.__class__, self.precision, self.scale))
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
class Boolean(DataType):
|
|
477
|
-
"""Boolean type."""
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
class String(DataType):
|
|
481
|
-
"""UTF-8 encoded string type."""
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
# Allow Utf8 as an alias for String
|
|
485
|
-
Utf8 = String
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
class Binary(DataType):
|
|
489
|
-
"""Binary type."""
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
class Date(TemporalType):
|
|
493
|
-
"""
|
|
494
|
-
Data type representing a calendar date.
|
|
495
|
-
|
|
496
|
-
Notes
|
|
497
|
-
-----
|
|
498
|
-
The underlying representation of this type is a 32-bit signed integer.
|
|
499
|
-
The integer indicates the number of days since the Unix epoch (1970-01-01).
|
|
500
|
-
The number can be negative to indicate dates before the epoch.
|
|
501
|
-
"""
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
class Time(TemporalType):
|
|
505
|
-
"""
|
|
506
|
-
Data type representing the time of day.
|
|
507
|
-
|
|
508
|
-
Notes
|
|
509
|
-
-----
|
|
510
|
-
The underlying representation of this type is a 64-bit signed integer.
|
|
511
|
-
The integer indicates the number of nanoseconds since midnight.
|
|
512
|
-
"""
|
|
513
|
-
|
|
514
|
-
@classmethod
|
|
515
|
-
def max(cls) -> pl.Expr:
|
|
516
|
-
"""
|
|
517
|
-
Return a literal expression representing the maximum value of this data type.
|
|
518
|
-
|
|
519
|
-
Examples
|
|
520
|
-
--------
|
|
521
|
-
>>> pl.select(pl.Time.max() == 86_399_999_999_999)
|
|
522
|
-
shape: (1, 1)
|
|
523
|
-
┌─────────┐
|
|
524
|
-
│ literal │
|
|
525
|
-
│ --- │
|
|
526
|
-
│ bool │
|
|
527
|
-
╞═════════╡
|
|
528
|
-
│ true │
|
|
529
|
-
└─────────┘
|
|
530
|
-
"""
|
|
531
|
-
return pl.Expr._from_pyexpr(plr._get_dtype_max(cls))
|
|
532
|
-
|
|
533
|
-
@classmethod
|
|
534
|
-
def min(cls) -> pl.Expr:
|
|
535
|
-
"""
|
|
536
|
-
Return a literal expression representing the minimum value of this data type.
|
|
537
|
-
|
|
538
|
-
Examples
|
|
539
|
-
--------
|
|
540
|
-
>>> pl.select(pl.Time.min() == 0)
|
|
541
|
-
shape: (1, 1)
|
|
542
|
-
┌─────────┐
|
|
543
|
-
│ literal │
|
|
544
|
-
│ --- │
|
|
545
|
-
│ bool │
|
|
546
|
-
╞═════════╡
|
|
547
|
-
│ true │
|
|
548
|
-
└─────────┘
|
|
549
|
-
"""
|
|
550
|
-
return pl.Expr._from_pyexpr(plr._get_dtype_min(cls))
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
class Datetime(TemporalType):
|
|
554
|
-
"""
|
|
555
|
-
Data type representing a calendar date and time of day.
|
|
556
|
-
|
|
557
|
-
Parameters
|
|
558
|
-
----------
|
|
559
|
-
time_unit : {'us', 'ns', 'ms'}
|
|
560
|
-
Unit of time. Defaults to `'us'` (microseconds).
|
|
561
|
-
time_zone
|
|
562
|
-
Time zone string, as defined in zoneinfo (to see valid strings run
|
|
563
|
-
`import zoneinfo; zoneinfo.available_timezones()` for a full list).
|
|
564
|
-
When used to match dtypes, can set this to "*" to check for Datetime
|
|
565
|
-
columns that have any (non-null) timezone.
|
|
566
|
-
|
|
567
|
-
Notes
|
|
568
|
-
-----
|
|
569
|
-
The underlying representation of this type is a 64-bit signed integer.
|
|
570
|
-
The integer indicates the number of time units since the Unix epoch
|
|
571
|
-
(1970-01-01 00:00:00). The number can be negative to indicate datetimes before the
|
|
572
|
-
epoch.
|
|
573
|
-
"""
|
|
574
|
-
|
|
575
|
-
time_unit: TimeUnit
|
|
576
|
-
time_zone: str | None
|
|
577
|
-
|
|
578
|
-
def __init__(
|
|
579
|
-
self, time_unit: TimeUnit = "us", time_zone: str | tzinfo | None = None
|
|
580
|
-
) -> None:
|
|
581
|
-
if time_unit not in ("ms", "us", "ns"):
|
|
582
|
-
msg = (
|
|
583
|
-
"invalid `time_unit`"
|
|
584
|
-
f"\n\nExpected one of {{'ns','us','ms'}}, got {time_unit!r}."
|
|
585
|
-
)
|
|
586
|
-
raise ValueError(msg)
|
|
587
|
-
|
|
588
|
-
if isinstance(time_zone, tzinfo):
|
|
589
|
-
time_zone = str(time_zone)
|
|
590
|
-
|
|
591
|
-
self.time_unit = time_unit
|
|
592
|
-
self.time_zone = time_zone
|
|
593
|
-
|
|
594
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
595
|
-
# allow comparing object instances to class
|
|
596
|
-
if type(other) is DataTypeClass and issubclass(other, Datetime):
|
|
597
|
-
return True
|
|
598
|
-
elif isinstance(other, Datetime):
|
|
599
|
-
return (
|
|
600
|
-
self.time_unit == other.time_unit and self.time_zone == other.time_zone
|
|
601
|
-
)
|
|
602
|
-
else:
|
|
603
|
-
return False
|
|
604
|
-
|
|
605
|
-
def __hash__(self) -> int:
|
|
606
|
-
return hash((self.__class__, self.time_unit, self.time_zone))
|
|
607
|
-
|
|
608
|
-
def __repr__(self) -> str:
|
|
609
|
-
class_name = self.__class__.__name__
|
|
610
|
-
return (
|
|
611
|
-
f"{class_name}(time_unit={self.time_unit!r}, time_zone={self.time_zone!r})"
|
|
612
|
-
)
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
class Duration(TemporalType):
|
|
616
|
-
"""
|
|
617
|
-
Data type representing a time duration.
|
|
618
|
-
|
|
619
|
-
Parameters
|
|
620
|
-
----------
|
|
621
|
-
time_unit : {'us', 'ns', 'ms'}
|
|
622
|
-
Unit of time. Defaults to `'us'` (microseconds).
|
|
623
|
-
|
|
624
|
-
Notes
|
|
625
|
-
-----
|
|
626
|
-
The underlying representation of this type is a 64-bit signed integer.
|
|
627
|
-
The integer indicates an amount of time units and can be negative to indicate
|
|
628
|
-
negative time offsets.
|
|
629
|
-
"""
|
|
630
|
-
|
|
631
|
-
time_unit: TimeUnit
|
|
632
|
-
|
|
633
|
-
def __init__(self, time_unit: TimeUnit = "us") -> None:
|
|
634
|
-
if time_unit not in ("ms", "us", "ns"):
|
|
635
|
-
msg = (
|
|
636
|
-
"invalid `time_unit`"
|
|
637
|
-
f"\n\nExpected one of {{'ns','us','ms'}}, got {time_unit!r}."
|
|
638
|
-
)
|
|
639
|
-
raise ValueError(msg)
|
|
640
|
-
|
|
641
|
-
self.time_unit = time_unit
|
|
642
|
-
|
|
643
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
644
|
-
# allow comparing object instances to class
|
|
645
|
-
if type(other) is DataTypeClass and issubclass(other, Duration):
|
|
646
|
-
return True
|
|
647
|
-
elif isinstance(other, Duration):
|
|
648
|
-
return self.time_unit == other.time_unit
|
|
649
|
-
else:
|
|
650
|
-
return False
|
|
651
|
-
|
|
652
|
-
def __hash__(self) -> int:
|
|
653
|
-
return hash((self.__class__, self.time_unit))
|
|
654
|
-
|
|
655
|
-
def __repr__(self) -> str:
|
|
656
|
-
class_name = self.__class__.__name__
|
|
657
|
-
return f"{class_name}(time_unit={self.time_unit!r})"
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
class Categories:
|
|
661
|
-
"""
|
|
662
|
-
A named collection of categories for `Categorical`.
|
|
663
|
-
|
|
664
|
-
Two categories are considered equal (and will use the same physical mapping of
|
|
665
|
-
categories to strings) if they have the same name, namespace and physical backing
|
|
666
|
-
type, even if they are created in separate calls to `Categories`.
|
|
667
|
-
|
|
668
|
-
.. warning::
|
|
669
|
-
This functionality is currently considered **unstable**. It may be
|
|
670
|
-
changed at any point without it being considered a breaking change.
|
|
671
|
-
"""
|
|
672
|
-
|
|
673
|
-
_categories: PyCategories
|
|
674
|
-
|
|
675
|
-
def __init__(
|
|
676
|
-
self,
|
|
677
|
-
name: str | None = None,
|
|
678
|
-
namespace: str = "",
|
|
679
|
-
physical: PolarsDataType = pldt.UInt32,
|
|
680
|
-
) -> None:
|
|
681
|
-
if name is None or name == "":
|
|
682
|
-
assert namespace == "", "global categories may not specify a namespace"
|
|
683
|
-
assert physical == pldt.UInt32, (
|
|
684
|
-
"global categories may not specify a physical type"
|
|
685
|
-
)
|
|
686
|
-
self._categories = PyCategories.global_categories()
|
|
687
|
-
return
|
|
688
|
-
|
|
689
|
-
if physical == pldt.UInt32:
|
|
690
|
-
internal_phys = "u32"
|
|
691
|
-
elif physical == pldt.UInt16:
|
|
692
|
-
internal_phys = "u16"
|
|
693
|
-
elif physical == pldt.UInt8:
|
|
694
|
-
internal_phys = "u8"
|
|
695
|
-
else:
|
|
696
|
-
msg = "Categorical physical must be one of pl.UInt(8|16|32)"
|
|
697
|
-
raise TypeError(msg)
|
|
698
|
-
|
|
699
|
-
self._categories = PyCategories(name, namespace, internal_phys)
|
|
700
|
-
|
|
701
|
-
@staticmethod
|
|
702
|
-
def _from_py_categories(py_categories: PyCategories) -> Categories:
|
|
703
|
-
self = Categories.__new__(Categories)
|
|
704
|
-
self._categories = py_categories
|
|
705
|
-
return self
|
|
706
|
-
|
|
707
|
-
@staticmethod
|
|
708
|
-
def random(
|
|
709
|
-
namespace: str = "", physical: PolarsDataType = pldt.UInt32
|
|
710
|
-
) -> Categories:
|
|
711
|
-
"""Creates a new Categories with a random name."""
|
|
712
|
-
if physical == pldt.UInt32:
|
|
713
|
-
internal_phys = "u32"
|
|
714
|
-
elif physical == pldt.UInt16:
|
|
715
|
-
internal_phys = "u16"
|
|
716
|
-
elif physical == pldt.UInt8:
|
|
717
|
-
internal_phys = "u8"
|
|
718
|
-
else:
|
|
719
|
-
msg = "Categorical physical must be one of pl.UInt(8|16|32)"
|
|
720
|
-
raise TypeError(msg)
|
|
721
|
-
|
|
722
|
-
return Categories._from_py_categories(
|
|
723
|
-
PyCategories.random(namespace, internal_phys)
|
|
724
|
-
)
|
|
725
|
-
|
|
726
|
-
def name(self) -> str:
|
|
727
|
-
"""The name of this `Categories`."""
|
|
728
|
-
return self._categories.name()
|
|
729
|
-
|
|
730
|
-
def namespace(self) -> str:
|
|
731
|
-
"""The namespace of this `Categories`."""
|
|
732
|
-
return self._categories.namespace()
|
|
733
|
-
|
|
734
|
-
def physical(self) -> PolarsDataType:
|
|
735
|
-
"""The physical type used to represent the categories."""
|
|
736
|
-
phys = self._categories.physical()
|
|
737
|
-
if phys == "u8":
|
|
738
|
-
return pldt.UInt8
|
|
739
|
-
elif phys == "u16":
|
|
740
|
-
return pldt.UInt16
|
|
741
|
-
elif phys == "u32":
|
|
742
|
-
return pldt.UInt32
|
|
743
|
-
else:
|
|
744
|
-
msg = "unknown physical dtype"
|
|
745
|
-
raise RuntimeError(msg)
|
|
746
|
-
|
|
747
|
-
def is_global(self) -> bool:
|
|
748
|
-
"""Returns whether this refers to the global categories."""
|
|
749
|
-
return self._categories.is_global()
|
|
750
|
-
|
|
751
|
-
def __getitem__(self, key: str | int | None) -> str | int | None:
|
|
752
|
-
if key is None:
|
|
753
|
-
return key
|
|
754
|
-
elif isinstance(key, str):
|
|
755
|
-
return self._categories.get_cat(key)
|
|
756
|
-
else:
|
|
757
|
-
return self._categories.cat_to_str(key)
|
|
758
|
-
|
|
759
|
-
def __repr__(self) -> str:
|
|
760
|
-
name = self.name()
|
|
761
|
-
namespace = self.namespace()
|
|
762
|
-
phys = self.physical()
|
|
763
|
-
if self._categories.is_global():
|
|
764
|
-
return "Categories()"
|
|
765
|
-
elif namespace == "" and phys == pldt.UInt32:
|
|
766
|
-
return f'Categories("{name}")'
|
|
767
|
-
else:
|
|
768
|
-
return f'Categories(name="{name}", namespace="{namespace}", physical=pl.{phys})'
|
|
769
|
-
|
|
770
|
-
def __hash__(self) -> int:
|
|
771
|
-
return hash(self._categories)
|
|
772
|
-
|
|
773
|
-
def __eq__(self, other: object) -> bool:
|
|
774
|
-
return isinstance(other, Categories) and self._categories == other._categories
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
class Categorical(DataType):
|
|
778
|
-
"""
|
|
779
|
-
A categorical encoding of a set of strings.
|
|
780
|
-
|
|
781
|
-
Parameters
|
|
782
|
-
----------
|
|
783
|
-
ordering : {'lexical', 'physical'}
|
|
784
|
-
Ordering by order of appearance (`'physical'`, default)
|
|
785
|
-
or string value (`'lexical'`).
|
|
786
|
-
|
|
787
|
-
.. deprecated:: 1.32.0
|
|
788
|
-
Parameter is now ignored. Always behaves as if `'lexical'` was passed.
|
|
789
|
-
"""
|
|
790
|
-
|
|
791
|
-
ordering: CategoricalOrdering | None
|
|
792
|
-
categories: Categories
|
|
793
|
-
|
|
794
|
-
def __init__(
|
|
795
|
-
self,
|
|
796
|
-
ordering: CategoricalOrdering | Categories | None = "lexical",
|
|
797
|
-
**kwargs: Any,
|
|
798
|
-
) -> None:
|
|
799
|
-
# Future API will be this, already support it for backwards compat.
|
|
800
|
-
if isinstance(ordering, Categories):
|
|
801
|
-
self.ordering = "lexical"
|
|
802
|
-
self.categories = ordering
|
|
803
|
-
assert len(kwargs) == 0
|
|
804
|
-
return
|
|
805
|
-
|
|
806
|
-
if ordering == "physical":
|
|
807
|
-
from polars._utils.deprecation import issue_deprecation_warning
|
|
808
|
-
|
|
809
|
-
issue_deprecation_warning(
|
|
810
|
-
"the physical Categorical ordering is deprecated. The ordering is now always lexical.",
|
|
811
|
-
version="1.32.0",
|
|
812
|
-
)
|
|
813
|
-
|
|
814
|
-
self.ordering = "lexical"
|
|
815
|
-
if kwargs.get("categories") is not None:
|
|
816
|
-
assert len(kwargs) == 1
|
|
817
|
-
self.categories = kwargs["categories"]
|
|
818
|
-
else:
|
|
819
|
-
assert len(kwargs) == 0
|
|
820
|
-
self.categories = Categories()
|
|
821
|
-
|
|
822
|
-
def __repr__(self) -> str:
|
|
823
|
-
if self.categories.is_global():
|
|
824
|
-
return f"{self.__class__.__name__}"
|
|
825
|
-
else:
|
|
826
|
-
return f"{self.__class__.__name__}({self.categories!r})"
|
|
827
|
-
|
|
828
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
829
|
-
# allow comparing object instances to class
|
|
830
|
-
if type(other) is DataTypeClass and issubclass(other, Categorical):
|
|
831
|
-
return self.categories.is_global()
|
|
832
|
-
elif isinstance(other, Categorical):
|
|
833
|
-
return self.categories == other.categories
|
|
834
|
-
else:
|
|
835
|
-
return False
|
|
836
|
-
|
|
837
|
-
def __hash__(self) -> int:
|
|
838
|
-
return hash((self.__class__, self.categories))
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
class Enum(DataType):
|
|
842
|
-
"""
|
|
843
|
-
A fixed categorical encoding of a unique set of strings.
|
|
844
|
-
|
|
845
|
-
Parameters
|
|
846
|
-
----------
|
|
847
|
-
categories
|
|
848
|
-
The categories in the dataset; must be a unique set of strings, or an
|
|
849
|
-
existing Python string-valued enum.
|
|
850
|
-
|
|
851
|
-
Examples
|
|
852
|
-
--------
|
|
853
|
-
Explicitly define enumeration categories:
|
|
854
|
-
|
|
855
|
-
>>> pl.Enum(["north", "south", "east", "west"])
|
|
856
|
-
Enum(categories=['north', 'south', 'east', 'west'])
|
|
857
|
-
|
|
858
|
-
Initialise from an existing Python enumeration:
|
|
859
|
-
|
|
860
|
-
>>> from http import HTTPMethod
|
|
861
|
-
>>> pl.Enum(HTTPMethod)
|
|
862
|
-
Enum(categories=['CONNECT', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'PATCH', 'POST', 'PUT', 'TRACE'])
|
|
863
|
-
""" # noqa: W505
|
|
864
|
-
|
|
865
|
-
categories: Series
|
|
866
|
-
|
|
867
|
-
def __init__(self, categories: Series | Iterable[str] | type[enum.Enum]) -> None:
|
|
868
|
-
if isclass(categories) and issubclass(categories, enum.Enum):
|
|
869
|
-
for enum_subclass in (enum.Flag, enum.IntEnum):
|
|
870
|
-
if issubclass(categories, enum_subclass):
|
|
871
|
-
enum_type_name = categories.__name__
|
|
872
|
-
msg = f"Enum categories must be strings; `{enum_type_name}` values are integers"
|
|
873
|
-
raise TypeError(msg)
|
|
874
|
-
|
|
875
|
-
enum_values = [
|
|
876
|
-
getattr(v, "value", v) for v in categories.__members__.values()
|
|
877
|
-
]
|
|
878
|
-
categories = pl.Series(values=enum_values)
|
|
879
|
-
elif not isinstance(categories, pl.Series):
|
|
880
|
-
categories = pl.Series(values=categories)
|
|
881
|
-
|
|
882
|
-
if categories.is_empty():
|
|
883
|
-
self.categories = pl.Series(name="category", dtype=String)
|
|
884
|
-
return
|
|
885
|
-
|
|
886
|
-
if categories.has_nulls():
|
|
887
|
-
msg = "Enum categories must not contain null values"
|
|
888
|
-
raise TypeError(msg)
|
|
889
|
-
|
|
890
|
-
if (dtype := categories.dtype) != String:
|
|
891
|
-
msg = f"Enum categories must be strings; found data of type {dtype}"
|
|
892
|
-
raise TypeError(msg)
|
|
893
|
-
|
|
894
|
-
if categories.n_unique() != categories.len():
|
|
895
|
-
duplicate = categories.filter(categories.is_duplicated())[0]
|
|
896
|
-
msg = f"Enum categories must be unique; found duplicate {duplicate!r}"
|
|
897
|
-
raise ValueError(msg)
|
|
898
|
-
|
|
899
|
-
self.categories = categories.rechunk().alias("category")
|
|
900
|
-
|
|
901
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
902
|
-
# allow comparing object instances to class
|
|
903
|
-
if type(other) is DataTypeClass and issubclass(other, Enum):
|
|
904
|
-
return True
|
|
905
|
-
elif isinstance(other, Enum):
|
|
906
|
-
return self.categories.equals(other.categories)
|
|
907
|
-
else:
|
|
908
|
-
return False
|
|
909
|
-
|
|
910
|
-
def __hash__(self) -> int:
|
|
911
|
-
return hash((self.__class__, tuple(self.categories)))
|
|
912
|
-
|
|
913
|
-
def __repr__(self) -> str:
|
|
914
|
-
class_name = self.__class__.__name__
|
|
915
|
-
return f"{class_name}(categories={self.categories.to_list()!r})"
|
|
916
|
-
|
|
917
|
-
def union(self, other: Enum) -> Enum:
|
|
918
|
-
"""Union of two Enums."""
|
|
919
|
-
return Enum(
|
|
920
|
-
F.concat((self.categories, other.categories)).unique(maintain_order=True)
|
|
921
|
-
)
|
|
922
|
-
|
|
923
|
-
__or__ = union
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
class Object(ObjectType):
|
|
927
|
-
"""Data type for wrapping arbitrary Python objects."""
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
class Null(DataType):
|
|
931
|
-
"""Data type representing null values."""
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
class Unknown(DataType):
|
|
935
|
-
"""Type representing DataType values that could not be determined statically."""
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
class List(NestedType):
|
|
939
|
-
"""
|
|
940
|
-
Variable length list type.
|
|
941
|
-
|
|
942
|
-
Parameters
|
|
943
|
-
----------
|
|
944
|
-
inner
|
|
945
|
-
The `DataType` of the values within each list.
|
|
946
|
-
|
|
947
|
-
Examples
|
|
948
|
-
--------
|
|
949
|
-
>>> df = pl.DataFrame(
|
|
950
|
-
... {
|
|
951
|
-
... "integer_lists": [[1, 2], [3, 4]],
|
|
952
|
-
... "float_lists": [[1.0, 2.0], [3.0, 4.0]],
|
|
953
|
-
... }
|
|
954
|
-
... )
|
|
955
|
-
>>> df
|
|
956
|
-
shape: (2, 2)
|
|
957
|
-
┌───────────────┬─────────────┐
|
|
958
|
-
│ integer_lists ┆ float_lists │
|
|
959
|
-
│ --- ┆ --- │
|
|
960
|
-
│ list[i64] ┆ list[f64] │
|
|
961
|
-
╞═══════════════╪═════════════╡
|
|
962
|
-
│ [1, 2] ┆ [1.0, 2.0] │
|
|
963
|
-
│ [3, 4] ┆ [3.0, 4.0] │
|
|
964
|
-
└───────────────┴─────────────┘
|
|
965
|
-
"""
|
|
966
|
-
|
|
967
|
-
inner: PolarsDataType
|
|
968
|
-
|
|
969
|
-
def __init__(self, inner: PolarsDataType | PythonDataType) -> None:
|
|
970
|
-
self.inner = polars.datatypes.parse_into_dtype(inner)
|
|
971
|
-
|
|
972
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
973
|
-
# This equality check allows comparison of type classes and type instances.
|
|
974
|
-
# If a parent type is not specific about its inner type, we infer it as equal:
|
|
975
|
-
# > list[i64] == list[i64] -> True
|
|
976
|
-
# > list[i64] == list[f32] -> False
|
|
977
|
-
# > list[i64] == list -> True
|
|
978
|
-
|
|
979
|
-
# allow comparing object instances to class
|
|
980
|
-
if type(other) is DataTypeClass and issubclass(other, List):
|
|
981
|
-
return True
|
|
982
|
-
elif isinstance(other, List):
|
|
983
|
-
return self.inner == other.inner
|
|
984
|
-
else:
|
|
985
|
-
return False
|
|
986
|
-
|
|
987
|
-
def __hash__(self) -> int:
|
|
988
|
-
return hash((self.__class__, self.inner))
|
|
989
|
-
|
|
990
|
-
def __repr__(self) -> str:
|
|
991
|
-
class_name = self.__class__.__name__
|
|
992
|
-
return f"{class_name}({self.inner!r})"
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
class Array(NestedType):
|
|
996
|
-
"""
|
|
997
|
-
Fixed length list type.
|
|
998
|
-
|
|
999
|
-
Parameters
|
|
1000
|
-
----------
|
|
1001
|
-
inner
|
|
1002
|
-
The `DataType` of the values within each array.
|
|
1003
|
-
shape
|
|
1004
|
-
The shape of the arrays.
|
|
1005
|
-
width
|
|
1006
|
-
The length of the arrays.
|
|
1007
|
-
|
|
1008
|
-
.. deprecated:: 0.20.31
|
|
1009
|
-
The `width` parameter for `Array` is deprecated. Use `shape` instead.
|
|
1010
|
-
|
|
1011
|
-
Examples
|
|
1012
|
-
--------
|
|
1013
|
-
>>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
|
|
1014
|
-
>>> s
|
|
1015
|
-
shape: (2,)
|
|
1016
|
-
Series: 'a' [array[i64, 2]]
|
|
1017
|
-
[
|
|
1018
|
-
[1, 2]
|
|
1019
|
-
[4, 3]
|
|
1020
|
-
]
|
|
1021
|
-
"""
|
|
1022
|
-
|
|
1023
|
-
inner: PolarsDataType
|
|
1024
|
-
size: int
|
|
1025
|
-
shape: tuple[int, ...]
|
|
1026
|
-
|
|
1027
|
-
def __init__(
|
|
1028
|
-
self,
|
|
1029
|
-
inner: PolarsDataType | PythonDataType,
|
|
1030
|
-
shape: int | tuple[int, ...] | None = None,
|
|
1031
|
-
*,
|
|
1032
|
-
width: int | None = None,
|
|
1033
|
-
) -> None:
|
|
1034
|
-
if width is not None:
|
|
1035
|
-
from polars._utils.deprecation import issue_deprecation_warning
|
|
1036
|
-
|
|
1037
|
-
issue_deprecation_warning(
|
|
1038
|
-
"the `width` parameter for `Array` is deprecated. Use `shape` instead.",
|
|
1039
|
-
version="0.20.31",
|
|
1040
|
-
)
|
|
1041
|
-
shape = width
|
|
1042
|
-
elif shape is None:
|
|
1043
|
-
msg = "Array constructor is missing the required argument `shape`"
|
|
1044
|
-
raise TypeError(msg)
|
|
1045
|
-
|
|
1046
|
-
inner_parsed = polars.datatypes.parse_into_dtype(inner)
|
|
1047
|
-
inner_shape = inner_parsed.shape if isinstance(inner_parsed, Array) else ()
|
|
1048
|
-
|
|
1049
|
-
if isinstance(shape, int):
|
|
1050
|
-
self.inner = inner_parsed
|
|
1051
|
-
self.size = shape
|
|
1052
|
-
self.shape = (shape,) + inner_shape
|
|
1053
|
-
|
|
1054
|
-
elif isinstance(shape, tuple) and isinstance(shape[0], int): # type: ignore[redundant-expr]
|
|
1055
|
-
if len(shape) > 1:
|
|
1056
|
-
inner_parsed = Array(inner_parsed, shape[1:])
|
|
1057
|
-
|
|
1058
|
-
self.inner = inner_parsed
|
|
1059
|
-
self.size = shape[0]
|
|
1060
|
-
self.shape = shape + inner_shape
|
|
1061
|
-
|
|
1062
|
-
else:
|
|
1063
|
-
msg = f"invalid input for shape: {shape!r}"
|
|
1064
|
-
raise TypeError(msg)
|
|
1065
|
-
|
|
1066
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
1067
|
-
# This equality check allows comparison of type classes and type instances.
|
|
1068
|
-
# If a parent type is not specific about its inner type, we infer it as equal:
|
|
1069
|
-
# > array[i64] == array[i64] -> True
|
|
1070
|
-
# > array[i64] == array[f32] -> False
|
|
1071
|
-
# > array[i64] == array -> True
|
|
1072
|
-
|
|
1073
|
-
# allow comparing object instances to class
|
|
1074
|
-
if type(other) is DataTypeClass and issubclass(other, Array):
|
|
1075
|
-
return True
|
|
1076
|
-
elif isinstance(other, Array):
|
|
1077
|
-
if self.shape != other.shape:
|
|
1078
|
-
return False
|
|
1079
|
-
else:
|
|
1080
|
-
return self.inner == other.inner
|
|
1081
|
-
else:
|
|
1082
|
-
return False
|
|
1083
|
-
|
|
1084
|
-
def __hash__(self) -> int:
|
|
1085
|
-
return hash((self.__class__, self.inner, self.size))
|
|
1086
|
-
|
|
1087
|
-
def __repr__(self) -> str:
|
|
1088
|
-
# Get leaf type
|
|
1089
|
-
dtype = self.inner
|
|
1090
|
-
while isinstance(dtype, Array):
|
|
1091
|
-
dtype = dtype.inner
|
|
1092
|
-
|
|
1093
|
-
class_name = self.__class__.__name__
|
|
1094
|
-
return f"{class_name}({dtype!r}, shape={self.shape})"
|
|
1095
|
-
|
|
1096
|
-
@property
|
|
1097
|
-
def width(self) -> int:
|
|
1098
|
-
"""The size of the Array."""
|
|
1099
|
-
from polars._utils.deprecation import issue_deprecation_warning
|
|
1100
|
-
|
|
1101
|
-
issue_deprecation_warning(
|
|
1102
|
-
"the `width` attribute for `Array` is deprecated. Use `size` instead.",
|
|
1103
|
-
version="0.20.31",
|
|
1104
|
-
)
|
|
1105
|
-
return self.size
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
class Field:
|
|
1109
|
-
"""
|
|
1110
|
-
Definition of a single field within a `Struct` DataType.
|
|
1111
|
-
|
|
1112
|
-
Parameters
|
|
1113
|
-
----------
|
|
1114
|
-
name
|
|
1115
|
-
The name of the field within its parent `Struct`.
|
|
1116
|
-
dtype
|
|
1117
|
-
The `DataType` of the field's values.
|
|
1118
|
-
"""
|
|
1119
|
-
|
|
1120
|
-
name: str
|
|
1121
|
-
dtype: PolarsDataType
|
|
1122
|
-
|
|
1123
|
-
def __init__(self, name: str, dtype: PolarsDataType) -> None:
|
|
1124
|
-
self.name = name
|
|
1125
|
-
self.dtype = polars.datatypes.parse_into_dtype(dtype)
|
|
1126
|
-
|
|
1127
|
-
def __eq__(self, other: Field) -> bool: # type: ignore[override]
|
|
1128
|
-
return (self.name == other.name) & (self.dtype == other.dtype)
|
|
1129
|
-
|
|
1130
|
-
def __hash__(self) -> int:
|
|
1131
|
-
return hash((self.name, self.dtype))
|
|
1132
|
-
|
|
1133
|
-
def __repr__(self) -> str:
|
|
1134
|
-
class_name = self.__class__.__name__
|
|
1135
|
-
return f"{class_name}({self.name!r}, {self.dtype})"
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
class Struct(NestedType):
|
|
1139
|
-
"""
|
|
1140
|
-
Struct composite type.
|
|
1141
|
-
|
|
1142
|
-
Parameters
|
|
1143
|
-
----------
|
|
1144
|
-
fields
|
|
1145
|
-
The fields that make up the struct. Can be either a sequence of Field
|
|
1146
|
-
objects or a mapping of column names to data types.
|
|
1147
|
-
|
|
1148
|
-
Examples
|
|
1149
|
-
--------
|
|
1150
|
-
Initialize using a dictionary:
|
|
1151
|
-
|
|
1152
|
-
>>> dtype = pl.Struct({"a": pl.Int8, "b": pl.List(pl.String)})
|
|
1153
|
-
>>> dtype
|
|
1154
|
-
Struct({'a': Int8, 'b': List(String)})
|
|
1155
|
-
|
|
1156
|
-
Initialize using a list of Field objects:
|
|
1157
|
-
|
|
1158
|
-
>>> dtype = pl.Struct([pl.Field("a", pl.Int8), pl.Field("b", pl.List(pl.String))])
|
|
1159
|
-
>>> dtype
|
|
1160
|
-
Struct({'a': Int8, 'b': List(String)})
|
|
1161
|
-
|
|
1162
|
-
When initializing a Series, Polars can infer a struct data type from the data.
|
|
1163
|
-
|
|
1164
|
-
>>> s = pl.Series([{"a": 1, "b": ["x", "y"]}, {"a": 2, "b": ["z"]}])
|
|
1165
|
-
>>> s
|
|
1166
|
-
shape: (2,)
|
|
1167
|
-
Series: '' [struct[2]]
|
|
1168
|
-
[
|
|
1169
|
-
{1,["x", "y"]}
|
|
1170
|
-
{2,["z"]}
|
|
1171
|
-
]
|
|
1172
|
-
>>> s.dtype
|
|
1173
|
-
Struct({'a': Int64, 'b': List(String)})
|
|
1174
|
-
"""
|
|
1175
|
-
|
|
1176
|
-
fields: list[Field]
|
|
1177
|
-
|
|
1178
|
-
def __init__(self, fields: Sequence[Field] | SchemaDict) -> None:
|
|
1179
|
-
if isinstance(fields, Mapping):
|
|
1180
|
-
self.fields = [Field(name, dtype) for name, dtype in fields.items()]
|
|
1181
|
-
else:
|
|
1182
|
-
self.fields = list(fields)
|
|
1183
|
-
|
|
1184
|
-
def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
|
|
1185
|
-
# The comparison allows comparing objects to classes, and specific
|
|
1186
|
-
# inner types to those without (eg: inner=None). if one of the
|
|
1187
|
-
# arguments is not specific about its inner type we infer it
|
|
1188
|
-
# as being equal. (See the List type for more info).
|
|
1189
|
-
if isclass(other) and issubclass(other, Struct):
|
|
1190
|
-
return True
|
|
1191
|
-
elif isinstance(other, Struct):
|
|
1192
|
-
return self.fields == other.fields
|
|
1193
|
-
else:
|
|
1194
|
-
return False
|
|
1195
|
-
|
|
1196
|
-
def __hash__(self) -> int:
|
|
1197
|
-
return hash((self.__class__, tuple(self.fields)))
|
|
1198
|
-
|
|
1199
|
-
def __iter__(self) -> Iterator[tuple[str, PolarsDataType]]:
|
|
1200
|
-
for fld in self.fields:
|
|
1201
|
-
yield fld.name, fld.dtype
|
|
1202
|
-
|
|
1203
|
-
def __reversed__(self) -> Iterator[tuple[str, PolarsDataType]]:
|
|
1204
|
-
for fld in reversed(self.fields):
|
|
1205
|
-
yield fld.name, fld.dtype
|
|
1206
|
-
|
|
1207
|
-
def __repr__(self) -> str:
|
|
1208
|
-
class_name = self.__class__.__name__
|
|
1209
|
-
return f"{class_name}({dict(self)})"
|
|
1210
|
-
|
|
1211
|
-
def to_schema(self) -> OrderedDict[str, PolarsDataType]:
|
|
1212
|
-
"""Return Struct dtype as a schema dict."""
|
|
1213
|
-
return OrderedDict(self)
|