polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
polars/schema.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import sys
|
|
5
|
+
from collections import OrderedDict
|
|
6
|
+
from collections.abc import Mapping
|
|
7
|
+
from typing import TYPE_CHECKING, Literal, Union, overload
|
|
8
|
+
|
|
9
|
+
from polars._typing import PythonDataType
|
|
10
|
+
from polars._utils.unstable import unstable
|
|
11
|
+
from polars.datatypes import DataType, DataTypeClass, is_polars_dtype
|
|
12
|
+
from polars.datatypes._parse import parse_into_dtype
|
|
13
|
+
from polars.exceptions import DuplicateError
|
|
14
|
+
from polars.interchange.protocol import CompatLevel
|
|
15
|
+
|
|
16
|
+
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
17
|
+
from polars._plr import (
|
|
18
|
+
init_polars_schema_from_arrow_c_schema,
|
|
19
|
+
polars_schema_field_from_arrow_c_schema,
|
|
20
|
+
polars_schema_to_pycapsule,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from collections.abc import Iterable
|
|
25
|
+
|
|
26
|
+
from polars import DataFrame, LazyFrame
|
|
27
|
+
from polars._typing import ArrowSchemaExportable
|
|
28
|
+
|
|
29
|
+
if sys.version_info >= (3, 10):
|
|
30
|
+
from typing import TypeAlias
|
|
31
|
+
else:
|
|
32
|
+
from typing_extensions import TypeAlias
|
|
33
|
+
|
|
34
|
+
if sys.version_info >= (3, 10):
|
|
35
|
+
|
|
36
|
+
def _required_init_args(tp: DataTypeClass) -> bool:
|
|
37
|
+
# note: this check is ~20% faster than the check for a
|
|
38
|
+
# custom "__init__", below, but is not available on py39
|
|
39
|
+
return bool(tp.__annotations__)
|
|
40
|
+
else:
|
|
41
|
+
|
|
42
|
+
def _required_init_args(tp: DataTypeClass) -> bool:
|
|
43
|
+
# indicates override of the default __init__
|
|
44
|
+
# (eg: this type requires specific args)
|
|
45
|
+
return "__init__" in tp.__dict__
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
BaseSchema = OrderedDict[str, DataType]
|
|
49
|
+
SchemaInitDataType: TypeAlias = Union[DataType, DataTypeClass, PythonDataType]
|
|
50
|
+
|
|
51
|
+
__all__ = ["Schema"]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _check_dtype(tp: DataType | DataTypeClass) -> DataType:
|
|
55
|
+
if not isinstance(tp, DataType):
|
|
56
|
+
# note: if nested/decimal, or has signature params, this implies required args
|
|
57
|
+
if tp.is_nested() or tp.is_decimal() or _required_init_args(tp):
|
|
58
|
+
msg = f"dtypes must be fully-specified, got: {tp!r}"
|
|
59
|
+
raise TypeError(msg)
|
|
60
|
+
tp = tp()
|
|
61
|
+
return tp # type: ignore[return-value]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Schema(BaseSchema):
|
|
65
|
+
"""
|
|
66
|
+
Ordered mapping of column names to their data type.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
schema
|
|
71
|
+
The schema definition given by column names and their associated
|
|
72
|
+
Polars data type. Accepts a mapping, or an iterable of tuples, or any
|
|
73
|
+
object implementing the `__arrow_c_schema__` PyCapsule interface
|
|
74
|
+
(e.g. pyarrow schemas).
|
|
75
|
+
|
|
76
|
+
Examples
|
|
77
|
+
--------
|
|
78
|
+
Define a schema by passing instantiated data types.
|
|
79
|
+
|
|
80
|
+
>>> schema = pl.Schema(
|
|
81
|
+
... {
|
|
82
|
+
... "foo": pl.String(),
|
|
83
|
+
... "bar": pl.Duration("us"),
|
|
84
|
+
... "baz": pl.Array(pl.Int8, 4),
|
|
85
|
+
... }
|
|
86
|
+
... )
|
|
87
|
+
>>> schema
|
|
88
|
+
Schema({'foo': String, 'bar': Duration(time_unit='us'), 'baz': Array(Int8, shape=(4,))})
|
|
89
|
+
|
|
90
|
+
Access the data type associated with a specific column name.
|
|
91
|
+
|
|
92
|
+
>>> schema["baz"]
|
|
93
|
+
Array(Int8, shape=(4,))
|
|
94
|
+
|
|
95
|
+
Access various schema properties using the `names`, `dtypes`, and `len` methods.
|
|
96
|
+
|
|
97
|
+
>>> schema.names()
|
|
98
|
+
['foo', 'bar', 'baz']
|
|
99
|
+
>>> schema.dtypes()
|
|
100
|
+
[String, Duration(time_unit='us'), Array(Int8, shape=(4,))]
|
|
101
|
+
>>> schema.len()
|
|
102
|
+
3
|
|
103
|
+
|
|
104
|
+
Import a pyarrow schema.
|
|
105
|
+
|
|
106
|
+
>>> import pyarrow as pa
|
|
107
|
+
>>> pl.Schema(pa.schema([pa.field("x", pa.int32())]))
|
|
108
|
+
Schema({'x': Int32})
|
|
109
|
+
|
|
110
|
+
Export a schema to pyarrow.
|
|
111
|
+
|
|
112
|
+
>>> pa.schema(pl.Schema({"x": pl.Int32}))
|
|
113
|
+
x: int32
|
|
114
|
+
""" # noqa: W505
|
|
115
|
+
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
schema: (
|
|
119
|
+
Mapping[str, SchemaInitDataType]
|
|
120
|
+
| Iterable[tuple[str, SchemaInitDataType] | ArrowSchemaExportable]
|
|
121
|
+
| ArrowSchemaExportable
|
|
122
|
+
| None
|
|
123
|
+
) = None,
|
|
124
|
+
*,
|
|
125
|
+
check_dtypes: bool = True,
|
|
126
|
+
) -> None:
|
|
127
|
+
if hasattr(schema, "__arrow_c_schema__") and not isinstance(schema, Schema):
|
|
128
|
+
init_polars_schema_from_arrow_c_schema(self, schema)
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
input = schema.items() if isinstance(schema, Mapping) else (schema or ())
|
|
132
|
+
for v in input:
|
|
133
|
+
name, tp = (
|
|
134
|
+
polars_schema_field_from_arrow_c_schema(v)
|
|
135
|
+
if hasattr(v, "__arrow_c_schema__") and not isinstance(v, DataType)
|
|
136
|
+
else v
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if name in self:
|
|
140
|
+
msg = f"iterable passed to pl.Schema contained duplicate name '{name}'"
|
|
141
|
+
raise DuplicateError(msg)
|
|
142
|
+
|
|
143
|
+
if not check_dtypes:
|
|
144
|
+
super().__setitem__(name, tp) # type: ignore[assignment]
|
|
145
|
+
elif is_polars_dtype(tp):
|
|
146
|
+
super().__setitem__(name, _check_dtype(tp))
|
|
147
|
+
else:
|
|
148
|
+
self[name] = tp
|
|
149
|
+
|
|
150
|
+
def __eq__(self, other: object) -> bool:
|
|
151
|
+
if not isinstance(other, Mapping):
|
|
152
|
+
return False
|
|
153
|
+
if len(self) != len(other):
|
|
154
|
+
return False
|
|
155
|
+
for (nm1, tp1), (nm2, tp2) in zip(self.items(), other.items()):
|
|
156
|
+
if nm1 != nm2 or not tp1.is_(tp2):
|
|
157
|
+
return False
|
|
158
|
+
return True
|
|
159
|
+
|
|
160
|
+
def __ne__(self, other: object) -> bool:
|
|
161
|
+
return not self.__eq__(other)
|
|
162
|
+
|
|
163
|
+
def __setitem__(
|
|
164
|
+
self, name: str, dtype: DataType | DataTypeClass | PythonDataType
|
|
165
|
+
) -> None:
|
|
166
|
+
dtype = _check_dtype(parse_into_dtype(dtype))
|
|
167
|
+
super().__setitem__(name, dtype)
|
|
168
|
+
|
|
169
|
+
@unstable()
|
|
170
|
+
def __arrow_c_schema__(self) -> object:
|
|
171
|
+
"""
|
|
172
|
+
Export a Schema via the Arrow PyCapsule Interface.
|
|
173
|
+
|
|
174
|
+
https://arrow.apache.org/docs/dev/format/CDataInterface/PyCapsuleInterface.html
|
|
175
|
+
"""
|
|
176
|
+
return polars_schema_to_pycapsule(self, CompatLevel.newest()._version)
|
|
177
|
+
|
|
178
|
+
def names(self) -> list[str]:
|
|
179
|
+
"""
|
|
180
|
+
Get the column names of the schema.
|
|
181
|
+
|
|
182
|
+
Examples
|
|
183
|
+
--------
|
|
184
|
+
>>> s = pl.Schema({"x": pl.Float64(), "y": pl.Datetime(time_zone="UTC")})
|
|
185
|
+
>>> s.names()
|
|
186
|
+
['x', 'y']
|
|
187
|
+
"""
|
|
188
|
+
return list(self.keys())
|
|
189
|
+
|
|
190
|
+
def dtypes(self) -> list[DataType]:
|
|
191
|
+
"""
|
|
192
|
+
Get the data types of the schema.
|
|
193
|
+
|
|
194
|
+
Examples
|
|
195
|
+
--------
|
|
196
|
+
>>> s = pl.Schema({"x": pl.UInt8(), "y": pl.List(pl.UInt8)})
|
|
197
|
+
>>> s.dtypes()
|
|
198
|
+
[UInt8, List(UInt8)]
|
|
199
|
+
"""
|
|
200
|
+
return list(self.values())
|
|
201
|
+
|
|
202
|
+
@overload
|
|
203
|
+
def to_frame(self, *, eager: Literal[False]) -> LazyFrame: ...
|
|
204
|
+
|
|
205
|
+
@overload
|
|
206
|
+
def to_frame(self, *, eager: Literal[True] = ...) -> DataFrame: ...
|
|
207
|
+
|
|
208
|
+
def to_frame(self, *, eager: bool = True) -> DataFrame | LazyFrame:
|
|
209
|
+
"""
|
|
210
|
+
Create an empty DataFrame (or LazyFrame) from this Schema.
|
|
211
|
+
|
|
212
|
+
Parameters
|
|
213
|
+
----------
|
|
214
|
+
eager
|
|
215
|
+
If True, create a DataFrame; otherwise, create a LazyFrame.
|
|
216
|
+
|
|
217
|
+
Examples
|
|
218
|
+
--------
|
|
219
|
+
>>> s = pl.Schema({"x": pl.Int32(), "y": pl.String()})
|
|
220
|
+
>>> s.to_frame()
|
|
221
|
+
shape: (0, 2)
|
|
222
|
+
┌─────┬─────┐
|
|
223
|
+
│ x ┆ y │
|
|
224
|
+
│ --- ┆ --- │
|
|
225
|
+
│ i32 ┆ str │
|
|
226
|
+
╞═════╪═════╡
|
|
227
|
+
└─────┴─────┘
|
|
228
|
+
>>> s.to_frame(eager=False) # doctest: +IGNORE_RESULT
|
|
229
|
+
<LazyFrame at 0x11BC0AD80>
|
|
230
|
+
"""
|
|
231
|
+
from polars import DataFrame, LazyFrame
|
|
232
|
+
|
|
233
|
+
return DataFrame(schema=self) if eager else LazyFrame(schema=self)
|
|
234
|
+
|
|
235
|
+
def len(self) -> int:
|
|
236
|
+
"""
|
|
237
|
+
Get the number of schema entries.
|
|
238
|
+
|
|
239
|
+
Examples
|
|
240
|
+
--------
|
|
241
|
+
>>> s = pl.Schema({"x": pl.Int32(), "y": pl.List(pl.String)})
|
|
242
|
+
>>> s.len()
|
|
243
|
+
2
|
|
244
|
+
>>> len(s)
|
|
245
|
+
2
|
|
246
|
+
"""
|
|
247
|
+
return len(self)
|
|
248
|
+
|
|
249
|
+
def to_python(self) -> dict[str, type]:
|
|
250
|
+
"""
|
|
251
|
+
Return a dictionary of column names and Python types.
|
|
252
|
+
|
|
253
|
+
Examples
|
|
254
|
+
--------
|
|
255
|
+
>>> s = pl.Schema(
|
|
256
|
+
... {
|
|
257
|
+
... "x": pl.Int8(),
|
|
258
|
+
... "y": pl.String(),
|
|
259
|
+
... "z": pl.Duration("us"),
|
|
260
|
+
... }
|
|
261
|
+
... )
|
|
262
|
+
>>> s.to_python()
|
|
263
|
+
{'x': <class 'int'>, 'y': <class 'str'>, 'z': <class 'datetime.timedelta'>}
|
|
264
|
+
"""
|
|
265
|
+
return {name: tp.to_python() for name, tp in self.items()}
|