polars-runtime-compat 1.34.0b2__cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Originally vendored from https://github.com/erdewit/nest_asyncio
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
# BSD 2-Clause License
|
|
6
|
+
|
|
7
|
+
# Copyright (c) 2025 Ritchie Vink
|
|
8
|
+
# Copyright (c) 2018-2020, Ewald de Wit
|
|
9
|
+
# All rights reserved.
|
|
10
|
+
|
|
11
|
+
# Redistribution and use in source and binary forms, with or without
|
|
12
|
+
# modification, are permitted provided that the following conditions are met:
|
|
13
|
+
|
|
14
|
+
# * Redistributions of source code must retain the above copyright notice, this
|
|
15
|
+
# list of conditions and the following disclaimer.
|
|
16
|
+
|
|
17
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
18
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
19
|
+
# and/or other materials provided with the distribution.
|
|
20
|
+
|
|
21
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
22
|
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
23
|
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
24
|
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
25
|
+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
26
|
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
27
|
+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
28
|
+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
29
|
+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
30
|
+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
31
|
+
|
|
32
|
+
# Ignore all lints, file is currently copied.
|
|
33
|
+
# ruff: noqa
|
|
34
|
+
# type: ignore
|
|
35
|
+
|
|
36
|
+
"""Patch asyncio to allow nested event loops."""
|
|
37
|
+
|
|
38
|
+
import asyncio
|
|
39
|
+
import asyncio.events as events
|
|
40
|
+
import os
|
|
41
|
+
import sys
|
|
42
|
+
import threading
|
|
43
|
+
from contextlib import contextmanager, suppress
|
|
44
|
+
from heapq import heappop
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def apply(loop=None):
|
|
48
|
+
"""Patch asyncio to make its event loop reentrant."""
|
|
49
|
+
_patch_asyncio()
|
|
50
|
+
_patch_policy()
|
|
51
|
+
_patch_tornado()
|
|
52
|
+
|
|
53
|
+
loop = loop or asyncio.get_event_loop()
|
|
54
|
+
_patch_loop(loop)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _patch_asyncio():
|
|
58
|
+
"""Patch asyncio module to use pure Python tasks and futures."""
|
|
59
|
+
|
|
60
|
+
def run(main, *, debug=False):
|
|
61
|
+
loop = asyncio.get_event_loop()
|
|
62
|
+
loop.set_debug(debug)
|
|
63
|
+
task = asyncio.ensure_future(main)
|
|
64
|
+
try:
|
|
65
|
+
return loop.run_until_complete(task)
|
|
66
|
+
finally:
|
|
67
|
+
if not task.done():
|
|
68
|
+
task.cancel()
|
|
69
|
+
with suppress(asyncio.CancelledError):
|
|
70
|
+
loop.run_until_complete(task)
|
|
71
|
+
|
|
72
|
+
def _get_event_loop(stacklevel=3):
|
|
73
|
+
loop = events._get_running_loop()
|
|
74
|
+
if loop is None:
|
|
75
|
+
loop = events.get_event_loop_policy().get_event_loop()
|
|
76
|
+
return loop
|
|
77
|
+
|
|
78
|
+
# Use module level _current_tasks, all_tasks and patch run method.
|
|
79
|
+
if hasattr(asyncio, "_nest_patched"):
|
|
80
|
+
return
|
|
81
|
+
if sys.version_info >= (3, 6, 0):
|
|
82
|
+
asyncio.Task = asyncio.tasks._CTask = asyncio.tasks.Task = asyncio.tasks._PyTask
|
|
83
|
+
asyncio.Future = asyncio.futures._CFuture = asyncio.futures.Future = (
|
|
84
|
+
asyncio.futures._PyFuture
|
|
85
|
+
)
|
|
86
|
+
if sys.version_info < (3, 7, 0):
|
|
87
|
+
asyncio.tasks._current_tasks = asyncio.tasks.Task._current_tasks
|
|
88
|
+
asyncio.all_tasks = asyncio.tasks.Task.all_tasks
|
|
89
|
+
if sys.version_info >= (3, 9, 0):
|
|
90
|
+
events._get_event_loop = events.get_event_loop = asyncio.get_event_loop = (
|
|
91
|
+
_get_event_loop
|
|
92
|
+
)
|
|
93
|
+
asyncio.run = run
|
|
94
|
+
asyncio._nest_patched = True
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _patch_policy():
|
|
98
|
+
"""Patch the policy to always return a patched loop."""
|
|
99
|
+
|
|
100
|
+
def get_event_loop(self):
|
|
101
|
+
if self._local._loop is None:
|
|
102
|
+
loop = self.new_event_loop()
|
|
103
|
+
_patch_loop(loop)
|
|
104
|
+
self.set_event_loop(loop)
|
|
105
|
+
return self._local._loop
|
|
106
|
+
|
|
107
|
+
policy = events.get_event_loop_policy()
|
|
108
|
+
policy.__class__.get_event_loop = get_event_loop
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _patch_loop(loop):
|
|
112
|
+
"""Patch loop to make it reentrant."""
|
|
113
|
+
|
|
114
|
+
def run_forever(self):
|
|
115
|
+
with manage_run(self), manage_asyncgens(self):
|
|
116
|
+
while True:
|
|
117
|
+
self._run_once()
|
|
118
|
+
if self._stopping:
|
|
119
|
+
break
|
|
120
|
+
self._stopping = False
|
|
121
|
+
|
|
122
|
+
def run_until_complete(self, future):
|
|
123
|
+
with manage_run(self):
|
|
124
|
+
f = asyncio.ensure_future(future, loop=self)
|
|
125
|
+
if f is not future:
|
|
126
|
+
f._log_destroy_pending = False
|
|
127
|
+
while not f.done():
|
|
128
|
+
self._run_once()
|
|
129
|
+
if self._stopping:
|
|
130
|
+
break
|
|
131
|
+
if not f.done():
|
|
132
|
+
raise RuntimeError("Event loop stopped before Future completed.")
|
|
133
|
+
return f.result()
|
|
134
|
+
|
|
135
|
+
def _run_once(self):
|
|
136
|
+
"""
|
|
137
|
+
Simplified re-implementation of asyncio's _run_once that
|
|
138
|
+
runs handles as they become ready.
|
|
139
|
+
"""
|
|
140
|
+
ready = self._ready
|
|
141
|
+
scheduled = self._scheduled
|
|
142
|
+
while scheduled and scheduled[0]._cancelled:
|
|
143
|
+
heappop(scheduled)
|
|
144
|
+
|
|
145
|
+
timeout = (
|
|
146
|
+
0
|
|
147
|
+
if ready or self._stopping
|
|
148
|
+
else min(max(scheduled[0]._when - self.time(), 0), 86400)
|
|
149
|
+
if scheduled
|
|
150
|
+
else None
|
|
151
|
+
)
|
|
152
|
+
event_list = self._selector.select(timeout)
|
|
153
|
+
self._process_events(event_list)
|
|
154
|
+
|
|
155
|
+
end_time = self.time() + self._clock_resolution
|
|
156
|
+
while scheduled and scheduled[0]._when < end_time:
|
|
157
|
+
handle = heappop(scheduled)
|
|
158
|
+
ready.append(handle)
|
|
159
|
+
|
|
160
|
+
for _ in range(len(ready)):
|
|
161
|
+
if not ready:
|
|
162
|
+
break
|
|
163
|
+
handle = ready.popleft()
|
|
164
|
+
if not handle._cancelled:
|
|
165
|
+
# preempt the current task so that that checks in
|
|
166
|
+
# Task.__step do not raise
|
|
167
|
+
curr_task = curr_tasks.pop(self, None)
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
handle._run()
|
|
171
|
+
finally:
|
|
172
|
+
# restore the current task
|
|
173
|
+
if curr_task is not None:
|
|
174
|
+
curr_tasks[self] = curr_task
|
|
175
|
+
|
|
176
|
+
handle = None
|
|
177
|
+
|
|
178
|
+
@contextmanager
|
|
179
|
+
def manage_run(self):
|
|
180
|
+
"""Set up the loop for running."""
|
|
181
|
+
self._check_closed()
|
|
182
|
+
old_thread_id = self._thread_id
|
|
183
|
+
old_running_loop = events._get_running_loop()
|
|
184
|
+
try:
|
|
185
|
+
self._thread_id = threading.get_ident()
|
|
186
|
+
events._set_running_loop(self)
|
|
187
|
+
self._num_runs_pending += 1
|
|
188
|
+
if self._is_proactorloop:
|
|
189
|
+
if self._self_reading_future is None:
|
|
190
|
+
self.call_soon(self._loop_self_reading)
|
|
191
|
+
yield
|
|
192
|
+
finally:
|
|
193
|
+
self._thread_id = old_thread_id
|
|
194
|
+
events._set_running_loop(old_running_loop)
|
|
195
|
+
self._num_runs_pending -= 1
|
|
196
|
+
if self._is_proactorloop:
|
|
197
|
+
if (
|
|
198
|
+
self._num_runs_pending == 0
|
|
199
|
+
and self._self_reading_future is not None
|
|
200
|
+
):
|
|
201
|
+
ov = self._self_reading_future._ov
|
|
202
|
+
self._self_reading_future.cancel()
|
|
203
|
+
if ov is not None:
|
|
204
|
+
self._proactor._unregister(ov)
|
|
205
|
+
self._self_reading_future = None
|
|
206
|
+
|
|
207
|
+
@contextmanager
|
|
208
|
+
def manage_asyncgens(self):
|
|
209
|
+
if not hasattr(sys, "get_asyncgen_hooks"):
|
|
210
|
+
# Python version is too old.
|
|
211
|
+
return
|
|
212
|
+
old_agen_hooks = sys.get_asyncgen_hooks()
|
|
213
|
+
try:
|
|
214
|
+
self._set_coroutine_origin_tracking(self._debug)
|
|
215
|
+
if self._asyncgens is not None:
|
|
216
|
+
sys.set_asyncgen_hooks(
|
|
217
|
+
firstiter=self._asyncgen_firstiter_hook,
|
|
218
|
+
finalizer=self._asyncgen_finalizer_hook,
|
|
219
|
+
)
|
|
220
|
+
yield
|
|
221
|
+
finally:
|
|
222
|
+
self._set_coroutine_origin_tracking(False)
|
|
223
|
+
if self._asyncgens is not None:
|
|
224
|
+
sys.set_asyncgen_hooks(*old_agen_hooks)
|
|
225
|
+
|
|
226
|
+
def _check_running(self):
|
|
227
|
+
"""Do not throw exception if loop is already running."""
|
|
228
|
+
pass
|
|
229
|
+
|
|
230
|
+
if hasattr(loop, "_nest_patched"):
|
|
231
|
+
return
|
|
232
|
+
if not isinstance(loop, asyncio.BaseEventLoop):
|
|
233
|
+
raise ValueError("Can't patch loop of type %s" % type(loop))
|
|
234
|
+
cls = loop.__class__
|
|
235
|
+
cls.run_forever = run_forever
|
|
236
|
+
cls.run_until_complete = run_until_complete
|
|
237
|
+
cls._run_once = _run_once
|
|
238
|
+
cls._check_running = _check_running
|
|
239
|
+
cls._check_runnung = _check_running # typo in Python 3.7 source
|
|
240
|
+
cls._num_runs_pending = 1 if loop.is_running() else 0
|
|
241
|
+
cls._is_proactorloop = os.name == "nt" and issubclass(
|
|
242
|
+
cls, asyncio.ProactorEventLoop
|
|
243
|
+
)
|
|
244
|
+
if sys.version_info < (3, 7, 0):
|
|
245
|
+
cls._set_coroutine_origin_tracking = cls._set_coroutine_wrapper
|
|
246
|
+
curr_tasks = (
|
|
247
|
+
asyncio.tasks._current_tasks
|
|
248
|
+
if sys.version_info >= (3, 7, 0)
|
|
249
|
+
else asyncio.Task._current_tasks
|
|
250
|
+
)
|
|
251
|
+
cls._nest_patched = True
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _patch_tornado():
|
|
255
|
+
"""
|
|
256
|
+
If tornado is imported before nest_asyncio, make tornado aware of
|
|
257
|
+
the pure-Python asyncio Future.
|
|
258
|
+
"""
|
|
259
|
+
if "tornado" in sys.modules:
|
|
260
|
+
import tornado.concurrent as tc # type: ignore
|
|
261
|
+
|
|
262
|
+
tc.Future = asyncio.Future
|
|
263
|
+
if asyncio.Future not in tc.FUTURES:
|
|
264
|
+
tc.FUTURES += (asyncio.Future,)
|
polars/_utils/parquet.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Any, Callable
|
|
2
|
+
|
|
3
|
+
from polars._typing import ParquetMetadataContext, ParquetMetadataFn
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def wrap_parquet_metadata_callback(
|
|
7
|
+
fn: ParquetMetadataFn,
|
|
8
|
+
) -> Callable[[Any], list[tuple[str, str]]]:
|
|
9
|
+
def pyo3_compatible_callback(ctx: Any) -> list[tuple[str, str]]:
|
|
10
|
+
ctx_py = ParquetMetadataContext(
|
|
11
|
+
arrow_schema=ctx.arrow_schema,
|
|
12
|
+
)
|
|
13
|
+
return list(fn(ctx_py).items())
|
|
14
|
+
|
|
15
|
+
return pyo3_compatible_callback
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from polars._utils.parse.expr import (
|
|
2
|
+
parse_into_expression,
|
|
3
|
+
parse_into_list_of_expressions,
|
|
4
|
+
parse_predicates_constraints_into_expression,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
# expr
|
|
9
|
+
"parse_into_expression",
|
|
10
|
+
"parse_into_list_of_expressions",
|
|
11
|
+
"parse_predicates_constraints_into_expression",
|
|
12
|
+
]
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from collections.abc import Collection, Iterable, Mapping
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import polars._reexport as pl
|
|
8
|
+
from polars import functions as F
|
|
9
|
+
from polars._utils.various import qualified_type_name
|
|
10
|
+
from polars.exceptions import ComputeError
|
|
11
|
+
|
|
12
|
+
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
13
|
+
import polars._plr as plr
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from polars import Expr
|
|
17
|
+
from polars._plr import PyExpr
|
|
18
|
+
from polars._typing import ColumnNameOrSelector, IntoExpr, PolarsDataType
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def parse_into_expression(
|
|
22
|
+
input: IntoExpr,
|
|
23
|
+
*,
|
|
24
|
+
str_as_lit: bool = False,
|
|
25
|
+
list_as_series: bool = False,
|
|
26
|
+
structify: bool = False,
|
|
27
|
+
dtype: PolarsDataType | None = None,
|
|
28
|
+
) -> PyExpr:
|
|
29
|
+
"""
|
|
30
|
+
Parse a single input into an expression.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
input
|
|
35
|
+
The input to be parsed as an expression.
|
|
36
|
+
str_as_lit
|
|
37
|
+
Interpret string input as a string literal. If set to `False` (default),
|
|
38
|
+
strings are parsed as column names.
|
|
39
|
+
list_as_series
|
|
40
|
+
Interpret list input as a Series literal. If set to `False` (default),
|
|
41
|
+
lists are parsed as list literals.
|
|
42
|
+
structify
|
|
43
|
+
Convert multi-column expressions to a single struct expression.
|
|
44
|
+
dtype
|
|
45
|
+
If the input is expected to resolve to a literal with a known dtype, pass
|
|
46
|
+
this to the `lit` constructor.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
PyExpr
|
|
51
|
+
"""
|
|
52
|
+
if isinstance(input, pl.Expr):
|
|
53
|
+
expr = input
|
|
54
|
+
if structify:
|
|
55
|
+
expr = _structify_expression(expr)
|
|
56
|
+
|
|
57
|
+
elif isinstance(input, str) and not str_as_lit:
|
|
58
|
+
expr = F.col(input)
|
|
59
|
+
elif isinstance(input, list) and list_as_series:
|
|
60
|
+
expr = F.lit(pl.Series(input), dtype=dtype)
|
|
61
|
+
else:
|
|
62
|
+
expr = F.lit(input, dtype=dtype)
|
|
63
|
+
|
|
64
|
+
return expr._pyexpr
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _structify_expression(expr: Expr) -> Expr:
|
|
68
|
+
unaliased_expr = expr.meta.undo_aliases()
|
|
69
|
+
if unaliased_expr.meta.has_multiple_outputs():
|
|
70
|
+
try:
|
|
71
|
+
expr_name = expr.meta.output_name()
|
|
72
|
+
except ComputeError:
|
|
73
|
+
expr = F.struct(expr)
|
|
74
|
+
else:
|
|
75
|
+
expr = F.struct(unaliased_expr).alias(expr_name)
|
|
76
|
+
return expr
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def parse_into_list_of_expressions(
|
|
80
|
+
*inputs: IntoExpr | Iterable[IntoExpr],
|
|
81
|
+
__structify: bool = False,
|
|
82
|
+
**named_inputs: IntoExpr,
|
|
83
|
+
) -> list[PyExpr]:
|
|
84
|
+
"""
|
|
85
|
+
Parse multiple inputs into a list of expressions.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
*inputs
|
|
90
|
+
Inputs to be parsed as expressions, specified as positional arguments.
|
|
91
|
+
**named_inputs
|
|
92
|
+
Additional inputs to be parsed as expressions, specified as keyword arguments.
|
|
93
|
+
The expressions will be renamed to the keyword used.
|
|
94
|
+
__structify
|
|
95
|
+
Convert multi-column expressions to a single struct expression.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
list of PyExpr
|
|
100
|
+
"""
|
|
101
|
+
exprs = _parse_positional_inputs(inputs, structify=__structify) # type: ignore[arg-type]
|
|
102
|
+
if named_inputs:
|
|
103
|
+
named_exprs = _parse_named_inputs(named_inputs, structify=__structify)
|
|
104
|
+
exprs.extend(named_exprs)
|
|
105
|
+
|
|
106
|
+
return exprs
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def parse_into_selector(
|
|
110
|
+
i: ColumnNameOrSelector,
|
|
111
|
+
*,
|
|
112
|
+
strict: bool = True,
|
|
113
|
+
) -> pl.Selector:
|
|
114
|
+
if isinstance(i, str):
|
|
115
|
+
import polars.selectors as cs
|
|
116
|
+
|
|
117
|
+
return cs.by_name([i], require_all=strict)
|
|
118
|
+
elif isinstance(i, pl.Selector):
|
|
119
|
+
return i
|
|
120
|
+
elif isinstance(i, pl.Expr):
|
|
121
|
+
return i.meta.as_selector()
|
|
122
|
+
else:
|
|
123
|
+
msg = f"cannot turn {qualified_type_name(i)!r} into selector"
|
|
124
|
+
raise TypeError(msg)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def parse_list_into_selector(
|
|
128
|
+
inputs: ColumnNameOrSelector | Collection[ColumnNameOrSelector],
|
|
129
|
+
*,
|
|
130
|
+
strict: bool = True,
|
|
131
|
+
) -> pl.Selector:
|
|
132
|
+
if isinstance(inputs, Collection) and not isinstance(inputs, str):
|
|
133
|
+
import polars.selectors as cs
|
|
134
|
+
|
|
135
|
+
columns = list(filter(lambda i: isinstance(i, str), inputs))
|
|
136
|
+
selector = cs.by_name(columns, require_all=strict) # type: ignore[arg-type]
|
|
137
|
+
|
|
138
|
+
if len(columns) == len(inputs):
|
|
139
|
+
return selector
|
|
140
|
+
|
|
141
|
+
# A bit cleaner
|
|
142
|
+
if len(columns) == 0:
|
|
143
|
+
selector = cs.empty()
|
|
144
|
+
|
|
145
|
+
for i in inputs:
|
|
146
|
+
selector |= parse_into_selector(i, strict=strict)
|
|
147
|
+
return selector
|
|
148
|
+
else:
|
|
149
|
+
return parse_into_selector(inputs, strict=strict)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _parse_positional_inputs(
|
|
153
|
+
inputs: tuple[IntoExpr, ...] | tuple[Iterable[IntoExpr]],
|
|
154
|
+
*,
|
|
155
|
+
structify: bool = False,
|
|
156
|
+
) -> list[PyExpr]:
|
|
157
|
+
inputs_iter = _parse_inputs_as_iterable(inputs)
|
|
158
|
+
return [parse_into_expression(e, structify=structify) for e in inputs_iter]
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _parse_inputs_as_iterable(
|
|
162
|
+
inputs: tuple[Any, ...] | tuple[Iterable[Any]],
|
|
163
|
+
) -> Iterable[Any]:
|
|
164
|
+
if not inputs:
|
|
165
|
+
return []
|
|
166
|
+
|
|
167
|
+
# Ensures that the outermost element cannot be a Dictionary (as an iterable)
|
|
168
|
+
if len(inputs) == 1 and isinstance(inputs[0], Mapping):
|
|
169
|
+
msg = (
|
|
170
|
+
"Cannot pass a dictionary as a single positional argument.\n"
|
|
171
|
+
"If you merely want the *keys*, use:\n"
|
|
172
|
+
" • df.method(*your_dict.keys())\n"
|
|
173
|
+
"If you need the key value pairs, use one of:\n"
|
|
174
|
+
" • unpack as keywords: df.method(**your_dict)\n"
|
|
175
|
+
" • build expressions: df.method(expr.alias(k) for k, expr in your_dict.items())"
|
|
176
|
+
)
|
|
177
|
+
raise TypeError(msg)
|
|
178
|
+
|
|
179
|
+
# Treat elements of a single iterable as separate inputs
|
|
180
|
+
if len(inputs) == 1 and _is_iterable(inputs[0]):
|
|
181
|
+
return inputs[0]
|
|
182
|
+
|
|
183
|
+
return inputs
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _is_iterable(input: Any | Iterable[Any]) -> bool:
|
|
187
|
+
return isinstance(input, Iterable) and not isinstance(
|
|
188
|
+
input, (str, bytes, pl.Series)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _parse_named_inputs(
|
|
193
|
+
named_inputs: dict[str, IntoExpr], *, structify: bool = False
|
|
194
|
+
) -> Iterable[PyExpr]:
|
|
195
|
+
for name, input in named_inputs.items():
|
|
196
|
+
yield parse_into_expression(input, structify=structify).alias(name)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def parse_predicates_constraints_into_expression(
|
|
200
|
+
*predicates: IntoExpr | Iterable[IntoExpr],
|
|
201
|
+
**constraints: Any,
|
|
202
|
+
) -> PyExpr:
|
|
203
|
+
"""
|
|
204
|
+
Parse predicates and constraints into a single expression.
|
|
205
|
+
|
|
206
|
+
The result is an AND-reduction of all inputs.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
*predicates
|
|
211
|
+
Predicates to be parsed, specified as positional arguments.
|
|
212
|
+
**constraints
|
|
213
|
+
Constraints to be parsed, specified as keyword arguments.
|
|
214
|
+
These will be converted to predicates of the form "keyword equals input value".
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
PyExpr
|
|
219
|
+
"""
|
|
220
|
+
all_predicates = _parse_positional_inputs(predicates) # type: ignore[arg-type]
|
|
221
|
+
|
|
222
|
+
if constraints:
|
|
223
|
+
constraint_predicates = _parse_constraints(constraints)
|
|
224
|
+
all_predicates.extend(constraint_predicates)
|
|
225
|
+
|
|
226
|
+
return _combine_predicates(all_predicates)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _parse_constraints(constraints: dict[str, IntoExpr]) -> Iterable[PyExpr]:
|
|
230
|
+
for name, value in constraints.items():
|
|
231
|
+
yield F.col(name).eq(value)._pyexpr
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _combine_predicates(predicates: list[PyExpr]) -> PyExpr:
|
|
235
|
+
if not predicates:
|
|
236
|
+
msg = "at least one predicate or constraint must be provided"
|
|
237
|
+
raise TypeError(msg)
|
|
238
|
+
|
|
239
|
+
if len(predicates) == 1:
|
|
240
|
+
return predicates[0]
|
|
241
|
+
|
|
242
|
+
return plr.all_horizontal(predicates)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
try:
|
|
2
|
+
import polars._plr as plr
|
|
3
|
+
|
|
4
|
+
_POLARS_VERSION = plr.__version__
|
|
5
|
+
except ImportError:
|
|
6
|
+
# This is only useful for documentation
|
|
7
|
+
import warnings
|
|
8
|
+
|
|
9
|
+
warnings.warn("Polars binary is missing!", stacklevel=2)
|
|
10
|
+
_POLARS_VERSION = ""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_polars_version() -> str:
|
|
14
|
+
"""
|
|
15
|
+
Return the version of the Python Polars package as a string.
|
|
16
|
+
|
|
17
|
+
If the Polars binary is missing, returns an empty string.
|
|
18
|
+
"""
|
|
19
|
+
return _POLARS_VERSION
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
from polars._utils.construction.dataframe import dataframe_to_pydf
|
|
7
|
+
from polars._utils.wrap import wrap_df, wrap_s
|
|
8
|
+
|
|
9
|
+
with contextlib.suppress(ImportError):
|
|
10
|
+
from polars._plr import PySeries
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from polars import DataFrame
|
|
14
|
+
from polars._typing import SchemaDefinition, SchemaDict
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def is_pycapsule(obj: Any) -> bool:
|
|
18
|
+
"""Check if object supports the PyCapsule interface."""
|
|
19
|
+
return hasattr(obj, "__arrow_c_stream__") or hasattr(obj, "__arrow_c_array__")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def pycapsule_to_frame(
|
|
23
|
+
obj: Any,
|
|
24
|
+
*,
|
|
25
|
+
schema: SchemaDefinition | None = None,
|
|
26
|
+
schema_overrides: SchemaDict | None = None,
|
|
27
|
+
rechunk: bool = False,
|
|
28
|
+
) -> DataFrame:
|
|
29
|
+
"""Convert PyCapsule object to DataFrame."""
|
|
30
|
+
if hasattr(obj, "__arrow_c_array__"):
|
|
31
|
+
# This uses the fact that PySeries.from_arrow_c_array will create a
|
|
32
|
+
# struct-typed Series. Then we unpack that to a DataFrame.
|
|
33
|
+
tmp_col_name = ""
|
|
34
|
+
s = wrap_s(PySeries.from_arrow_c_array(obj))
|
|
35
|
+
df = s.to_frame(tmp_col_name).unnest(tmp_col_name)
|
|
36
|
+
|
|
37
|
+
elif hasattr(obj, "__arrow_c_stream__"):
|
|
38
|
+
# This uses the fact that PySeries.from_arrow_c_stream will create a
|
|
39
|
+
# struct-typed Series. Then we unpack that to a DataFrame.
|
|
40
|
+
tmp_col_name = ""
|
|
41
|
+
s = wrap_s(PySeries.from_arrow_c_stream(obj))
|
|
42
|
+
df = s.to_frame(tmp_col_name).unnest(tmp_col_name)
|
|
43
|
+
else:
|
|
44
|
+
msg = f"object does not support PyCapsule interface; found {obj!r} "
|
|
45
|
+
raise TypeError(msg)
|
|
46
|
+
|
|
47
|
+
if rechunk:
|
|
48
|
+
df = df.rechunk()
|
|
49
|
+
if schema or schema_overrides:
|
|
50
|
+
df = wrap_df(
|
|
51
|
+
dataframe_to_pydf(df, schema=schema, schema_overrides=schema_overrides)
|
|
52
|
+
)
|
|
53
|
+
return df
|
polars/_utils/scan.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from polars import DataFrame
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _execute_from_rust(
|
|
10
|
+
function: Any, with_columns: list[str] | None, *args: Any
|
|
11
|
+
) -> DataFrame:
|
|
12
|
+
"""
|
|
13
|
+
Deserialize and execute the given function for the projected columns.
|
|
14
|
+
|
|
15
|
+
Called from polars-lazy. Polars-lazy provides the bytes of the pickled function and
|
|
16
|
+
the projected columns.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
function
|
|
21
|
+
function object
|
|
22
|
+
with_columns
|
|
23
|
+
Columns that are projected
|
|
24
|
+
*args
|
|
25
|
+
Additional function arguments.
|
|
26
|
+
"""
|
|
27
|
+
return function(with_columns, *args)
|
polars/_utils/serde.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Utility for serializing Polars objects."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from io import BytesIO, StringIO
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Callable, Literal, overload
|
|
8
|
+
|
|
9
|
+
from polars._utils.various import normalize_filepath
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from io import IOBase
|
|
13
|
+
|
|
14
|
+
from polars._typing import SerializationFormat
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@overload
|
|
18
|
+
def serialize_polars_object(
|
|
19
|
+
serializer: Callable[[IOBase | str], None], file: None, format: Literal["binary"]
|
|
20
|
+
) -> bytes: ...
|
|
21
|
+
@overload
|
|
22
|
+
def serialize_polars_object(
|
|
23
|
+
serializer: Callable[[IOBase | str], None], file: None, format: Literal["json"]
|
|
24
|
+
) -> str: ...
|
|
25
|
+
@overload
|
|
26
|
+
def serialize_polars_object(
|
|
27
|
+
serializer: Callable[[IOBase | str], None],
|
|
28
|
+
file: IOBase | str | Path,
|
|
29
|
+
format: SerializationFormat,
|
|
30
|
+
) -> None: ...
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def serialize_polars_object(
|
|
34
|
+
serializer: Callable[[IOBase | str], None],
|
|
35
|
+
file: IOBase | str | Path | None,
|
|
36
|
+
format: SerializationFormat,
|
|
37
|
+
) -> bytes | str | None:
|
|
38
|
+
"""Serialize a Polars object (DataFrame/LazyFrame/Expr)."""
|
|
39
|
+
|
|
40
|
+
def serialize_to_bytes() -> bytes:
|
|
41
|
+
with BytesIO() as buf:
|
|
42
|
+
serializer(buf)
|
|
43
|
+
serialized = buf.getvalue()
|
|
44
|
+
return serialized
|
|
45
|
+
|
|
46
|
+
if file is None:
|
|
47
|
+
serialized = serialize_to_bytes()
|
|
48
|
+
return serialized.decode() if format == "json" else serialized
|
|
49
|
+
elif isinstance(file, StringIO):
|
|
50
|
+
serialized_str = serialize_to_bytes().decode()
|
|
51
|
+
file.write(serialized_str)
|
|
52
|
+
return None
|
|
53
|
+
elif isinstance(file, BytesIO):
|
|
54
|
+
serialized = serialize_to_bytes()
|
|
55
|
+
file.write(serialized)
|
|
56
|
+
return None
|
|
57
|
+
elif isinstance(file, (str, Path)):
|
|
58
|
+
file = normalize_filepath(file)
|
|
59
|
+
serializer(file)
|
|
60
|
+
return None
|
|
61
|
+
else:
|
|
62
|
+
serializer(file)
|
|
63
|
+
return None
|