polars-runtime-compat 1.34.0b2__cp39-abi3-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
polars/ml/torch.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# mypy: disable-error-code="unused-ignore"
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from polars._utils.unstable import issue_unstable_warning
|
|
7
|
+
from polars.dataframe import DataFrame
|
|
8
|
+
from polars.expr import Expr
|
|
9
|
+
from polars.selectors import exclude
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
import sys
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
|
|
15
|
+
from torch import Tensor, memory_format
|
|
16
|
+
|
|
17
|
+
if sys.version_info >= (3, 11):
|
|
18
|
+
from typing import Self
|
|
19
|
+
else:
|
|
20
|
+
from typing_extensions import Self
|
|
21
|
+
try:
|
|
22
|
+
import torch
|
|
23
|
+
from torch.utils.data import TensorDataset
|
|
24
|
+
except ImportError:
|
|
25
|
+
msg = (
|
|
26
|
+
"Required package 'torch' not installed.\n"
|
|
27
|
+
"Please install it using the command `pip install torch`."
|
|
28
|
+
)
|
|
29
|
+
raise ImportError(msg) from None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
__all__ = ["PolarsDataset"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PolarsDataset(TensorDataset): # type: ignore[misc]
|
|
36
|
+
"""
|
|
37
|
+
TensorDataset class specialized for use with Polars DataFrames.
|
|
38
|
+
|
|
39
|
+
.. warning::
|
|
40
|
+
This functionality is considered **unstable**. It may be changed
|
|
41
|
+
at any point without it being considered a breaking change.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
frame
|
|
46
|
+
Polars DataFrame containing the data that will be retrieved as Tensors.
|
|
47
|
+
label
|
|
48
|
+
One or more column names or expressions that label the feature data; results
|
|
49
|
+
in `(features,label)` tuples, where all non-label columns are considered
|
|
50
|
+
to be features. If no label is designated then each returned item is a
|
|
51
|
+
simple `(features,)` tuple containing all row elements.
|
|
52
|
+
features
|
|
53
|
+
One or more column names or expressions that represent the feature data.
|
|
54
|
+
If not provided, all columns not designated as labels are considered to be
|
|
55
|
+
features.
|
|
56
|
+
|
|
57
|
+
Notes
|
|
58
|
+
-----
|
|
59
|
+
* Integer, slice, range, integer list/Tensor Dataset indexing is all supported.
|
|
60
|
+
* Designating multi-element labels is also supported.
|
|
61
|
+
|
|
62
|
+
Examples
|
|
63
|
+
--------
|
|
64
|
+
>>> from torch.utils.data import DataLoader
|
|
65
|
+
>>> df = pl.DataFrame(
|
|
66
|
+
... data=[
|
|
67
|
+
... (0, 1, 1.5),
|
|
68
|
+
... (1, 0, -0.5),
|
|
69
|
+
... (2, 0, 0.0),
|
|
70
|
+
... (3, 1, -2.25),
|
|
71
|
+
... ],
|
|
72
|
+
... schema=["lbl", "feat1", "feat2"],
|
|
73
|
+
... orient="row",
|
|
74
|
+
... )
|
|
75
|
+
|
|
76
|
+
Create a Dataset from a Polars DataFrame, standardising the dtype and
|
|
77
|
+
separating the label/feature columns.
|
|
78
|
+
|
|
79
|
+
>>> ds = df.to_torch("dataset", label="lbl", dtype=pl.Float32)
|
|
80
|
+
>>> ds # doctest: +IGNORE_RESULT
|
|
81
|
+
<PolarsDataset [len:4, features:2, labels:1] at 0x156B033B0>
|
|
82
|
+
>>> ds.features
|
|
83
|
+
tensor([[ 1.0000, 1.5000],
|
|
84
|
+
[ 0.0000, -0.5000],
|
|
85
|
+
[ 0.0000, 0.0000],
|
|
86
|
+
[ 1.0000, -2.2500]])
|
|
87
|
+
>>> ds[0]
|
|
88
|
+
(tensor([1.0000, 1.5000]), tensor(0.))
|
|
89
|
+
|
|
90
|
+
The Dataset can be used standalone, or in conjunction with a DataLoader.
|
|
91
|
+
|
|
92
|
+
>>> dl = DataLoader(ds, batch_size=2)
|
|
93
|
+
>>> list(dl)
|
|
94
|
+
[[tensor([[ 1.0000, 1.5000],
|
|
95
|
+
[ 0.0000, -0.5000]]),
|
|
96
|
+
tensor([0., 1.])],
|
|
97
|
+
[tensor([[ 0.0000, 0.0000],
|
|
98
|
+
[ 1.0000, -2.2500]]),
|
|
99
|
+
tensor([2., 3.])]]
|
|
100
|
+
|
|
101
|
+
Note that the label can be given as an expression as well as a column name,
|
|
102
|
+
allowing for independent transform and dtype adjustment from the feature
|
|
103
|
+
columns.
|
|
104
|
+
|
|
105
|
+
>>> ds = df.to_torch(
|
|
106
|
+
... "dataset",
|
|
107
|
+
... dtype=pl.Float32,
|
|
108
|
+
... label=(pl.col("lbl") * 8).cast(pl.Int16),
|
|
109
|
+
... )
|
|
110
|
+
>>> ds[:2]
|
|
111
|
+
(tensor([[ 1.0000, 1.5000],
|
|
112
|
+
[ 0.0000, -0.5000]]), tensor([0, 8], dtype=torch.int16))
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
tensors: tuple[Tensor, ...]
|
|
116
|
+
labels: Tensor | None
|
|
117
|
+
features: Tensor
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
frame: DataFrame,
|
|
122
|
+
*,
|
|
123
|
+
label: str | Expr | Sequence[str | Expr] | None = None,
|
|
124
|
+
features: str | Expr | Sequence[str | Expr] | None = None,
|
|
125
|
+
) -> None:
|
|
126
|
+
issue_unstable_warning("`PolarsDataset` is considered unstable.")
|
|
127
|
+
if isinstance(label, (str, Expr)):
|
|
128
|
+
label = [label]
|
|
129
|
+
|
|
130
|
+
label_frame: DataFrame | None = None
|
|
131
|
+
if not label:
|
|
132
|
+
feature_frame = frame.select(features) if features else frame
|
|
133
|
+
self.features = feature_frame.to_torch()
|
|
134
|
+
self.tensors = (self.features,)
|
|
135
|
+
self.labels = None
|
|
136
|
+
else:
|
|
137
|
+
label_frame = frame.select(*label)
|
|
138
|
+
self.labels = ( # type: ignore[attr-defined]
|
|
139
|
+
label_frame if len(label) > 1 else label_frame.to_series()
|
|
140
|
+
).to_torch()
|
|
141
|
+
|
|
142
|
+
feature_frame = frame.select(
|
|
143
|
+
features
|
|
144
|
+
if (isinstance(features, Expr) or features)
|
|
145
|
+
else exclude(label_frame.columns)
|
|
146
|
+
)
|
|
147
|
+
self.features = feature_frame.to_torch()
|
|
148
|
+
self.tensors = (self.features, self.labels) # type: ignore[assignment]
|
|
149
|
+
|
|
150
|
+
self._n_labels = 0 if (label_frame is None) else label_frame.width
|
|
151
|
+
self._n_features = feature_frame.width
|
|
152
|
+
|
|
153
|
+
def __copy__(self) -> Self:
|
|
154
|
+
"""Return a shallow copy of this PolarsDataset."""
|
|
155
|
+
dummy_frame = DataFrame({"blank": [0]})
|
|
156
|
+
dataset_copy = self.__class__(dummy_frame)
|
|
157
|
+
for attr in (
|
|
158
|
+
"tensors",
|
|
159
|
+
"labels",
|
|
160
|
+
"features",
|
|
161
|
+
"_n_labels",
|
|
162
|
+
"_n_features",
|
|
163
|
+
):
|
|
164
|
+
setattr(dataset_copy, attr, getattr(self, attr))
|
|
165
|
+
return dataset_copy
|
|
166
|
+
|
|
167
|
+
def __repr__(self) -> str:
|
|
168
|
+
"""Return a string representation of the PolarsDataset."""
|
|
169
|
+
return (
|
|
170
|
+
f"<{type(self).__name__} "
|
|
171
|
+
f"[len:{len(self)},"
|
|
172
|
+
f" features:{self._n_features},"
|
|
173
|
+
f" labels:{self._n_labels}"
|
|
174
|
+
f"] at 0x{id(self):X}>"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def half(
|
|
178
|
+
self,
|
|
179
|
+
*,
|
|
180
|
+
features: bool = True,
|
|
181
|
+
labels: bool = True,
|
|
182
|
+
memory_format: memory_format = torch.preserve_format,
|
|
183
|
+
) -> Self:
|
|
184
|
+
"""
|
|
185
|
+
Return a copy of this PolarsDataset with the numeric data converted to f16.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
features
|
|
190
|
+
Convert feature data to half precision (f16).
|
|
191
|
+
labels
|
|
192
|
+
Convert label data to half precision (f16).
|
|
193
|
+
memory_format
|
|
194
|
+
Desired memory format for the modified tensors.
|
|
195
|
+
"""
|
|
196
|
+
ds = self.__copy__()
|
|
197
|
+
if features:
|
|
198
|
+
ds.features = self.features.to(torch.float16, memory_format=memory_format)
|
|
199
|
+
if self.labels is not None:
|
|
200
|
+
if labels:
|
|
201
|
+
ds.labels = self.labels.to(torch.float16, memory_format=memory_format)
|
|
202
|
+
ds.tensors = (ds.features, ds.labels) # type: ignore[assignment]
|
|
203
|
+
else:
|
|
204
|
+
ds.tensors = (ds.features,)
|
|
205
|
+
return ds
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def schema(self) -> dict[str, torch.dtype | None]:
|
|
209
|
+
"""Return the features/labels schema."""
|
|
210
|
+
return {
|
|
211
|
+
"features": self.features.dtype,
|
|
212
|
+
"labels": self.labels.dtype if self.labels is not None else None,
|
|
213
|
+
}
|
polars/ml/utilities.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from polars import DataFrame
|
|
4
|
+
from polars._dependencies import numpy as np
|
|
5
|
+
from polars._typing import IndexOrder
|
|
6
|
+
from polars.datatypes import Array, List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def frame_to_numpy(
|
|
10
|
+
df: DataFrame,
|
|
11
|
+
*,
|
|
12
|
+
writable: bool,
|
|
13
|
+
target: str,
|
|
14
|
+
order: IndexOrder = "fortran",
|
|
15
|
+
) -> np.ndarray[Any, Any]:
|
|
16
|
+
"""Convert a DataFrame to a NumPy array for use with Jax or PyTorch."""
|
|
17
|
+
for nm, tp in df.schema.items():
|
|
18
|
+
if tp == List:
|
|
19
|
+
msg = f"cannot convert List column {nm!r} to {target} (use Array dtype instead)"
|
|
20
|
+
raise TypeError(msg) from None
|
|
21
|
+
|
|
22
|
+
if df.width == 1 and df.schema.dtypes()[0] == Array:
|
|
23
|
+
arr = df[df.columns[0]].to_numpy(writable=writable)
|
|
24
|
+
else:
|
|
25
|
+
arr = df.to_numpy(writable=writable, order=order)
|
|
26
|
+
|
|
27
|
+
if arr.dtype == object:
|
|
28
|
+
msg = f"cannot convert DataFrame to {target} (mixed type columns result in `object` dtype)\n{df.schema!r}"
|
|
29
|
+
raise TypeError(msg)
|
|
30
|
+
return arr
|
polars/plugins.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import sys
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
from polars._utils.parse import parse_into_list_of_expressions
|
|
10
|
+
from polars._utils.wrap import wrap_expr
|
|
11
|
+
|
|
12
|
+
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
13
|
+
import polars._plr as plr
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Iterable
|
|
17
|
+
|
|
18
|
+
from polars import Expr
|
|
19
|
+
from polars._typing import IntoExpr
|
|
20
|
+
|
|
21
|
+
__all__ = ["register_plugin_function"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def register_plugin_function(
|
|
25
|
+
*,
|
|
26
|
+
plugin_path: Path | str,
|
|
27
|
+
function_name: str,
|
|
28
|
+
args: IntoExpr | Iterable[IntoExpr],
|
|
29
|
+
kwargs: dict[str, Any] | None = None,
|
|
30
|
+
is_elementwise: bool = False,
|
|
31
|
+
changes_length: bool = False,
|
|
32
|
+
returns_scalar: bool = False,
|
|
33
|
+
cast_to_supertype: bool = False,
|
|
34
|
+
input_wildcard_expansion: bool = False,
|
|
35
|
+
pass_name_to_apply: bool = False,
|
|
36
|
+
use_abs_path: bool = False,
|
|
37
|
+
) -> Expr:
|
|
38
|
+
"""
|
|
39
|
+
Register a plugin function.
|
|
40
|
+
|
|
41
|
+
See the `user guide <https://docs.pola.rs/user-guide/plugins/expr_plugins>`_
|
|
42
|
+
for more information about plugins.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
plugin_path
|
|
47
|
+
Path to the plugin package. Accepts either the file path to the dynamic library
|
|
48
|
+
file or the path to the directory containing it.
|
|
49
|
+
function_name
|
|
50
|
+
The name of the Rust function to register.
|
|
51
|
+
args
|
|
52
|
+
The arguments passed to this function. These get passed to the `input`
|
|
53
|
+
argument on the Rust side, and have to be expressions (or be convertible
|
|
54
|
+
to expressions).
|
|
55
|
+
kwargs
|
|
56
|
+
Non-expression arguments to the plugin function. These must be
|
|
57
|
+
JSON serializable.
|
|
58
|
+
is_elementwise
|
|
59
|
+
Indicate that the function operates on scalars only. This will potentially
|
|
60
|
+
trigger fast paths.
|
|
61
|
+
changes_length
|
|
62
|
+
Indicate that the function will change the length of the expression.
|
|
63
|
+
For example, a `unique` or `slice` operation.
|
|
64
|
+
returns_scalar
|
|
65
|
+
Automatically explode on unit length if the function ran as final aggregation.
|
|
66
|
+
This is the case for aggregations like `sum`, `min`, `covariance` etc.
|
|
67
|
+
cast_to_supertype
|
|
68
|
+
Cast the input expressions to their supertype.
|
|
69
|
+
input_wildcard_expansion
|
|
70
|
+
Expand wildcard expressions before executing the function.
|
|
71
|
+
pass_name_to_apply
|
|
72
|
+
If set to `True`, the `Series` passed to the function in a group-by operation
|
|
73
|
+
will ensure the name is set. This is an extra heap allocation per group.
|
|
74
|
+
use_abs_path
|
|
75
|
+
If set to `True`, the path will be resolved to an absolute path.
|
|
76
|
+
The path to the dynamic library is relative to the virtual environment by
|
|
77
|
+
default.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
Expr
|
|
82
|
+
|
|
83
|
+
Warnings
|
|
84
|
+
--------
|
|
85
|
+
This is highly unsafe as this will call the C function loaded by
|
|
86
|
+
`plugin::function_name`.
|
|
87
|
+
|
|
88
|
+
The parameters you set dictate how Polars will handle the function.
|
|
89
|
+
Make sure they are correct!
|
|
90
|
+
"""
|
|
91
|
+
pyexprs = parse_into_list_of_expressions(args)
|
|
92
|
+
serialized_kwargs = _serialize_kwargs(kwargs)
|
|
93
|
+
plugin_path = _resolve_plugin_path(plugin_path, use_abs_path=use_abs_path)
|
|
94
|
+
|
|
95
|
+
return wrap_expr(
|
|
96
|
+
plr.register_plugin_function(
|
|
97
|
+
plugin_path=str(plugin_path),
|
|
98
|
+
function_name=function_name,
|
|
99
|
+
args=pyexprs,
|
|
100
|
+
kwargs=serialized_kwargs,
|
|
101
|
+
is_elementwise=is_elementwise,
|
|
102
|
+
input_wildcard_expansion=input_wildcard_expansion,
|
|
103
|
+
returns_scalar=returns_scalar,
|
|
104
|
+
cast_to_supertype=cast_to_supertype,
|
|
105
|
+
pass_name_to_apply=pass_name_to_apply,
|
|
106
|
+
changes_length=changes_length,
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _serialize_kwargs(kwargs: dict[str, Any] | None) -> bytes:
|
|
112
|
+
"""Serialize the function's keyword arguments."""
|
|
113
|
+
if not kwargs:
|
|
114
|
+
return b""
|
|
115
|
+
|
|
116
|
+
import pickle
|
|
117
|
+
|
|
118
|
+
# Use the highest pickle protocol supported the serde-pickle crate:
|
|
119
|
+
# https://docs.rs/serde-pickle/latest/serde_pickle/
|
|
120
|
+
return pickle.dumps(kwargs, protocol=5)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@lru_cache(maxsize=16)
|
|
124
|
+
def _resolve_plugin_path(path: Path | str, *, use_abs_path: bool = False) -> Path:
|
|
125
|
+
"""Get the file path of the dynamic library file."""
|
|
126
|
+
if not isinstance(path, Path):
|
|
127
|
+
path = Path(path)
|
|
128
|
+
|
|
129
|
+
if path.is_file():
|
|
130
|
+
return _resolve_file_path(path, use_abs_path=use_abs_path)
|
|
131
|
+
|
|
132
|
+
for p in path.iterdir():
|
|
133
|
+
if _is_dynamic_lib(p):
|
|
134
|
+
return _resolve_file_path(p, use_abs_path=use_abs_path)
|
|
135
|
+
|
|
136
|
+
msg = f"no dynamic library found at path: {path}"
|
|
137
|
+
raise FileNotFoundError(msg)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _is_dynamic_lib(path: Path) -> bool:
|
|
141
|
+
return path.is_file() and path.suffix in (".so", ".dll", ".pyd")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _resolve_file_path(path: Path, *, use_abs_path: bool = False) -> Path:
|
|
145
|
+
venv_path = Path(sys.prefix)
|
|
146
|
+
|
|
147
|
+
if use_abs_path:
|
|
148
|
+
return path.resolve()
|
|
149
|
+
else:
|
|
150
|
+
try:
|
|
151
|
+
file_path = path.relative_to(venv_path)
|
|
152
|
+
except ValueError: # Fallback
|
|
153
|
+
file_path = path.resolve()
|
|
154
|
+
|
|
155
|
+
return file_path
|
polars/py.typed
ADDED
|
File without changes
|
polars/pyproject.toml
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "polars"
|
|
3
|
+
description = "Blazingly fast DataFrame library"
|
|
4
|
+
readme = "README.md"
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Ritchie Vink", email = "ritchie46@gmail.com" },
|
|
7
|
+
]
|
|
8
|
+
version = "1.34.0b2"
|
|
9
|
+
license = { file = "LICENSE" }
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
|
|
12
|
+
keywords = ["dataframe", "arrow", "out-of-core"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 5 - Production/Stable",
|
|
15
|
+
"Environment :: Console",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Programming Language :: Rust",
|
|
28
|
+
"Topic :: Scientific/Engineering",
|
|
29
|
+
"Typing :: Typed",
|
|
30
|
+
]
|
|
31
|
+
dependencies = ["polars-runtime-32 == 1.34.0b2"]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://www.pola.rs/"
|
|
35
|
+
Documentation = "https://docs.pola.rs/api/python/stable/reference/index.html"
|
|
36
|
+
Repository = "https://github.com/pola-rs/polars"
|
|
37
|
+
Changelog = "https://github.com/pola-rs/polars/releases"
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
# Runtimes
|
|
41
|
+
rt64 = ["polars-runtime-64 == 1.34.0b2"]
|
|
42
|
+
rtcompat = ["polars-runtime-compat == 1.34.0b2"]
|
|
43
|
+
|
|
44
|
+
# NOTE: keep this list in sync with show_versions() and requirements-dev.txt
|
|
45
|
+
polars_cloud = ["polars_cloud >= 0.0.1a1"]
|
|
46
|
+
# Interop
|
|
47
|
+
numpy = ["numpy >= 1.16.0"]
|
|
48
|
+
pandas = ["pandas", "polars[pyarrow]"]
|
|
49
|
+
pyarrow = ["pyarrow >= 7.0.0"]
|
|
50
|
+
pydantic = ["pydantic"]
|
|
51
|
+
|
|
52
|
+
# Excel
|
|
53
|
+
calamine = ["fastexcel >= 0.9"]
|
|
54
|
+
openpyxl = ["openpyxl >= 3.0.0"]
|
|
55
|
+
xlsx2csv = ["xlsx2csv >= 0.8.0"]
|
|
56
|
+
xlsxwriter = ["xlsxwriter"]
|
|
57
|
+
excel = ["polars[calamine,openpyxl,xlsx2csv,xlsxwriter]"]
|
|
58
|
+
|
|
59
|
+
# Database
|
|
60
|
+
adbc = ["adbc-driver-manager[dbapi]", "adbc-driver-sqlite[dbapi]"]
|
|
61
|
+
connectorx = ["connectorx >= 0.3.2"]
|
|
62
|
+
sqlalchemy = ["sqlalchemy", "polars[pandas]"]
|
|
63
|
+
database = ["polars[adbc,connectorx,sqlalchemy]"]
|
|
64
|
+
|
|
65
|
+
# Cloud
|
|
66
|
+
fsspec = ["fsspec"]
|
|
67
|
+
|
|
68
|
+
# Other I/O
|
|
69
|
+
deltalake = ["deltalake >= 1.0.0"]
|
|
70
|
+
iceberg = ["pyiceberg >= 0.7.1"]
|
|
71
|
+
|
|
72
|
+
# Other
|
|
73
|
+
async = ["gevent"]
|
|
74
|
+
cloudpickle = ["cloudpickle"]
|
|
75
|
+
graph = ["matplotlib"]
|
|
76
|
+
plot = ["altair >= 5.4.0"]
|
|
77
|
+
style = ["great-tables >= 0.8.0"]
|
|
78
|
+
timezone = ["tzdata; platform_system == 'Windows'"]
|
|
79
|
+
|
|
80
|
+
# GPU Engine
|
|
81
|
+
gpu = ["cudf-polars-cu12"]
|
|
82
|
+
|
|
83
|
+
# All
|
|
84
|
+
all = [
|
|
85
|
+
"polars[async,cloudpickle,database,deltalake,excel,fsspec,graph,iceberg,numpy,pandas,plot,pyarrow,pydantic,style,timezone]",
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
[[tool.uv.index]]
|
|
89
|
+
name = "pytorch"
|
|
90
|
+
url = "https://download.pytorch.org/whl/cpu"
|
|
91
|
+
explicit = true
|
|
92
|
+
|
|
93
|
+
[tool.uv.sources]
|
|
94
|
+
torch = [
|
|
95
|
+
{ index = "pytorch" },
|
|
96
|
+
]
|