polars-runtime-compat 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b5__cp39-abi3-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
- polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
polars/sql/context.py
DELETED
|
@@ -1,677 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
import re
|
|
5
|
-
from typing import (
|
|
6
|
-
TYPE_CHECKING,
|
|
7
|
-
Callable,
|
|
8
|
-
Generic,
|
|
9
|
-
Union,
|
|
10
|
-
overload,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
from polars._dependencies import _check_for_pandas, _check_for_pyarrow
|
|
14
|
-
from polars._dependencies import pandas as pd
|
|
15
|
-
from polars._dependencies import pyarrow as pa
|
|
16
|
-
from polars._typing import FrameType
|
|
17
|
-
from polars._utils.deprecation import deprecate_renamed_parameter
|
|
18
|
-
from polars._utils.pycapsule import is_pycapsule
|
|
19
|
-
from polars._utils.unstable import issue_unstable_warning
|
|
20
|
-
from polars._utils.various import _get_stack_locals, qualified_type_name
|
|
21
|
-
from polars._utils.wrap import wrap_ldf
|
|
22
|
-
from polars.convert import from_arrow, from_pandas
|
|
23
|
-
from polars.dataframe import DataFrame
|
|
24
|
-
from polars.lazyframe import LazyFrame
|
|
25
|
-
from polars.series import Series
|
|
26
|
-
|
|
27
|
-
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
28
|
-
from polars._plr import PySQLContext
|
|
29
|
-
|
|
30
|
-
if TYPE_CHECKING:
|
|
31
|
-
import sys
|
|
32
|
-
from collections.abc import Collection, Mapping
|
|
33
|
-
from types import TracebackType
|
|
34
|
-
from typing import Any, Final, Literal
|
|
35
|
-
|
|
36
|
-
if sys.version_info >= (3, 10):
|
|
37
|
-
from typing import TypeAlias
|
|
38
|
-
else:
|
|
39
|
-
from typing_extensions import TypeAlias
|
|
40
|
-
|
|
41
|
-
if sys.version_info >= (3, 11):
|
|
42
|
-
from typing import Self
|
|
43
|
-
else:
|
|
44
|
-
from typing_extensions import Self
|
|
45
|
-
|
|
46
|
-
CompatibleFrameType: TypeAlias = Union[
|
|
47
|
-
DataFrame,
|
|
48
|
-
LazyFrame,
|
|
49
|
-
Series,
|
|
50
|
-
pd.DataFrame,
|
|
51
|
-
pd.Series[Any],
|
|
52
|
-
pa.Table,
|
|
53
|
-
pa.RecordBatch,
|
|
54
|
-
]
|
|
55
|
-
|
|
56
|
-
__all__ = ["SQLContext"]
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _compatible_frame(obj: Any) -> bool:
|
|
60
|
-
"""Check if the object can be converted to DataFrame."""
|
|
61
|
-
return (
|
|
62
|
-
is_pycapsule(obj)
|
|
63
|
-
or isinstance(obj, LazyFrame)
|
|
64
|
-
or (_check_for_pandas(obj) and isinstance(obj, (pd.DataFrame, pd.Series)))
|
|
65
|
-
or (_check_for_pyarrow(obj) and isinstance(obj, (pa.Table, pa.RecordBatch)))
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def _ensure_lazyframe(obj: Any) -> LazyFrame:
|
|
70
|
-
"""Return LazyFrame from compatible input."""
|
|
71
|
-
if isinstance(obj, (DataFrame, LazyFrame)):
|
|
72
|
-
return obj.lazy()
|
|
73
|
-
elif isinstance(obj, Series):
|
|
74
|
-
return obj.to_frame().lazy()
|
|
75
|
-
elif _check_for_pandas(obj) and isinstance(obj, (pd.DataFrame, pd.Series)):
|
|
76
|
-
if isinstance(frame := from_pandas(obj), Series):
|
|
77
|
-
frame = frame.to_frame()
|
|
78
|
-
return frame.lazy()
|
|
79
|
-
elif is_pycapsule(obj) or (
|
|
80
|
-
_check_for_pyarrow(obj) and isinstance(obj, (pa.Table, pa.RecordBatch))
|
|
81
|
-
):
|
|
82
|
-
return from_arrow(obj).lazy() # type: ignore[union-attr]
|
|
83
|
-
else:
|
|
84
|
-
msg = f"unrecognised frame type: {qualified_type_name(obj)}"
|
|
85
|
-
raise ValueError(msg)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def _get_frame_locals(
|
|
89
|
-
*,
|
|
90
|
-
all_compatible: bool,
|
|
91
|
-
n_objects: int | None = None,
|
|
92
|
-
named: str | Collection[str] | Callable[[str], bool] | None = None,
|
|
93
|
-
) -> dict[str, Any]:
|
|
94
|
-
"""Return compatible frame objects from the local stack."""
|
|
95
|
-
of_type = _compatible_frame if all_compatible else (DataFrame, LazyFrame, Series)
|
|
96
|
-
return _get_stack_locals(of_type=of_type, n_objects=n_objects, named=named) # type: ignore[arg-type]
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
class SQLContext(Generic[FrameType]):
|
|
100
|
-
"""
|
|
101
|
-
Run SQL queries against DataFrame, LazyFrame, and Series data.
|
|
102
|
-
|
|
103
|
-
.. warning::
|
|
104
|
-
This functionality is considered **unstable**, although it is close to being
|
|
105
|
-
considered stable. It may be changed at any point without it being considered
|
|
106
|
-
a breaking change.
|
|
107
|
-
"""
|
|
108
|
-
|
|
109
|
-
_ctxt: PySQLContext
|
|
110
|
-
_eager_execution: Final[bool]
|
|
111
|
-
_tables_scope_stack: list[set[str]]
|
|
112
|
-
|
|
113
|
-
# note: the type-overloaded methods are required to support accurate typing
|
|
114
|
-
# of the frame return from "execute" (which may be DataFrame or LazyFrame),
|
|
115
|
-
# as that is influenced by both the "eager" flag at init-time AND the "eager"
|
|
116
|
-
# flag at query-time (if anyone can find a lighter-weight set of annotations
|
|
117
|
-
# that successfully resolves this, please go for it... ;)
|
|
118
|
-
|
|
119
|
-
@overload
|
|
120
|
-
def __init__(
|
|
121
|
-
self: SQLContext[LazyFrame],
|
|
122
|
-
frames: Mapping[str, CompatibleFrameType | None] | None = ...,
|
|
123
|
-
*,
|
|
124
|
-
register_globals: bool | int = ...,
|
|
125
|
-
all_compatible: bool = ...,
|
|
126
|
-
eager: Literal[False] = False,
|
|
127
|
-
**named_frames: CompatibleFrameType | None,
|
|
128
|
-
) -> None: ...
|
|
129
|
-
|
|
130
|
-
@overload
|
|
131
|
-
def __init__(
|
|
132
|
-
self: SQLContext[DataFrame],
|
|
133
|
-
frames: Mapping[str, CompatibleFrameType | None] | None = ...,
|
|
134
|
-
*,
|
|
135
|
-
register_globals: bool | int = ...,
|
|
136
|
-
all_compatible: bool = ...,
|
|
137
|
-
eager: Literal[True],
|
|
138
|
-
**named_frames: CompatibleFrameType | None,
|
|
139
|
-
) -> None: ...
|
|
140
|
-
|
|
141
|
-
@overload
|
|
142
|
-
def __init__(
|
|
143
|
-
self: SQLContext[DataFrame],
|
|
144
|
-
frames: Mapping[str, CompatibleFrameType | None] | None = ...,
|
|
145
|
-
*,
|
|
146
|
-
register_globals: bool | int = ...,
|
|
147
|
-
all_compatible: bool = ...,
|
|
148
|
-
eager: bool,
|
|
149
|
-
**named_frames: CompatibleFrameType | None,
|
|
150
|
-
) -> None: ...
|
|
151
|
-
|
|
152
|
-
@deprecate_renamed_parameter("eager_execution", "eager", version="0.20.31")
|
|
153
|
-
def __init__(
|
|
154
|
-
self,
|
|
155
|
-
frames: Mapping[str, CompatibleFrameType | None] | None = None,
|
|
156
|
-
*,
|
|
157
|
-
register_globals: bool | int = False,
|
|
158
|
-
eager: bool = False,
|
|
159
|
-
**named_frames: CompatibleFrameType | None,
|
|
160
|
-
) -> None:
|
|
161
|
-
"""
|
|
162
|
-
Initialize a new `SQLContext`.
|
|
163
|
-
|
|
164
|
-
.. versionchanged:: 0.20.31
|
|
165
|
-
The `eager_execution` parameter was renamed `eager`.
|
|
166
|
-
|
|
167
|
-
Parameters
|
|
168
|
-
----------
|
|
169
|
-
frames
|
|
170
|
-
A `{name:frame, ...}` mapping which can include Polars frames *and*
|
|
171
|
-
pandas DataFrames, Series and pyarrow Table and RecordBatch objects.
|
|
172
|
-
register_globals
|
|
173
|
-
Register compatible objects (polars DataFrame, LazyFrame, and Series) found
|
|
174
|
-
in the globals, automatically mapping their variable name to a table name.
|
|
175
|
-
To register other objects (pandas/pyarrow data) pass them explicitly, or
|
|
176
|
-
call the `execute_global` classmethod. If given an integer then only the
|
|
177
|
-
most recent "n" objects found will be registered.
|
|
178
|
-
eager
|
|
179
|
-
If True, returns execution results as `DataFrame` instead of `LazyFrame`.
|
|
180
|
-
(Note that the query itself is always executed in lazy-mode; this parameter
|
|
181
|
-
impacts whether :meth:`execute` returns an eager or lazy result frame).
|
|
182
|
-
**named_frames
|
|
183
|
-
Named eager/lazy frames, provided as kwargs.
|
|
184
|
-
|
|
185
|
-
Examples
|
|
186
|
-
--------
|
|
187
|
-
>>> lf = pl.LazyFrame({"a": [1, 2, 3], "b": ["x", None, "z"]})
|
|
188
|
-
>>> res = pl.SQLContext(frame=lf).execute(
|
|
189
|
-
... "SELECT b, a*2 AS two_a FROM frame WHERE b IS NOT NULL"
|
|
190
|
-
... )
|
|
191
|
-
>>> res.collect()
|
|
192
|
-
shape: (2, 2)
|
|
193
|
-
┌─────┬───────┐
|
|
194
|
-
│ b ┆ two_a │
|
|
195
|
-
│ --- ┆ --- │
|
|
196
|
-
│ str ┆ i64 │
|
|
197
|
-
╞═════╪═══════╡
|
|
198
|
-
│ x ┆ 2 │
|
|
199
|
-
│ z ┆ 6 │
|
|
200
|
-
└─────┴───────┘
|
|
201
|
-
"""
|
|
202
|
-
issue_unstable_warning(
|
|
203
|
-
"`SQLContext` is considered **unstable**, although it is close to being considered stable."
|
|
204
|
-
)
|
|
205
|
-
self._ctxt = PySQLContext.new()
|
|
206
|
-
self._eager_execution = eager
|
|
207
|
-
|
|
208
|
-
frames = dict(frames or {})
|
|
209
|
-
if register_globals:
|
|
210
|
-
for name, obj in _get_frame_locals(
|
|
211
|
-
all_compatible=False,
|
|
212
|
-
).items():
|
|
213
|
-
if name not in frames and name not in named_frames:
|
|
214
|
-
named_frames[name] = obj
|
|
215
|
-
|
|
216
|
-
if frames or named_frames:
|
|
217
|
-
frames.update(named_frames)
|
|
218
|
-
self.register_many(frames)
|
|
219
|
-
|
|
220
|
-
@overload
|
|
221
|
-
@classmethod
|
|
222
|
-
def execute_global(
|
|
223
|
-
cls, query: str, *, eager: Literal[False] = False
|
|
224
|
-
) -> LazyFrame: ...
|
|
225
|
-
|
|
226
|
-
@overload
|
|
227
|
-
@classmethod
|
|
228
|
-
def execute_global(cls, query: str, *, eager: Literal[True]) -> DataFrame: ...
|
|
229
|
-
|
|
230
|
-
@overload
|
|
231
|
-
@classmethod
|
|
232
|
-
def execute_global(cls, query: str, *, eager: bool) -> DataFrame | LazyFrame: ...
|
|
233
|
-
|
|
234
|
-
@classmethod
|
|
235
|
-
def execute_global(
|
|
236
|
-
cls, query: str, *, eager: bool = False
|
|
237
|
-
) -> DataFrame | LazyFrame:
|
|
238
|
-
"""
|
|
239
|
-
Immediately execute a SQL query, automatically registering frame globals.
|
|
240
|
-
|
|
241
|
-
Notes
|
|
242
|
-
-----
|
|
243
|
-
* This convenience method automatically registers all compatible objects in
|
|
244
|
-
the local stack that are referenced in the query, mapping their variable name
|
|
245
|
-
to a table name. Note that in addition to polars DataFrame, LazyFrame, and
|
|
246
|
-
Series this method *also* registers pandas DataFrame, Series, and pyarrow
|
|
247
|
-
Table and RecordBatch objects.
|
|
248
|
-
* Instead of calling this classmethod you should consider using `pl.sql`,
|
|
249
|
-
which will use this code internally.
|
|
250
|
-
|
|
251
|
-
Parameters
|
|
252
|
-
----------
|
|
253
|
-
query
|
|
254
|
-
A valid SQL query string.
|
|
255
|
-
eager
|
|
256
|
-
If True, returns execution results as `DataFrame` instead of `LazyFrame`.
|
|
257
|
-
(Note that the query itself is always executed in lazy-mode).
|
|
258
|
-
|
|
259
|
-
Examples
|
|
260
|
-
--------
|
|
261
|
-
>>> import pandas as pd
|
|
262
|
-
>>> df = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
263
|
-
>>> df_pandas = pd.DataFrame({"a": [2, 3, 4], "c": [7, 8, 9]})
|
|
264
|
-
|
|
265
|
-
Join a polars LazyFrame with a pandas DataFrame (note use of the preferred
|
|
266
|
-
`pl.sql` method, which is equivalent to `SQLContext.execute_global`):
|
|
267
|
-
|
|
268
|
-
>>> pl.sql("SELECT df.*, c FROM df JOIN df_pandas USING(a)").collect()
|
|
269
|
-
shape: (2, 3)
|
|
270
|
-
┌─────┬─────┬─────┐
|
|
271
|
-
│ a ┆ b ┆ c │
|
|
272
|
-
│ --- ┆ --- ┆ --- │
|
|
273
|
-
│ i64 ┆ i64 ┆ i64 │
|
|
274
|
-
╞═════╪═════╪═════╡
|
|
275
|
-
│ 2 ┆ 5 ┆ 7 │
|
|
276
|
-
│ 3 ┆ 6 ┆ 8 │
|
|
277
|
-
└─────┴─────┴─────┘
|
|
278
|
-
"""
|
|
279
|
-
# basic extraction of possible table names from the query, so we don't register
|
|
280
|
-
# unnecessary objects from the globals (ideally we shuoold look to make the
|
|
281
|
-
# underlying `sqlparser-rs` lib parse the query to identify table names)
|
|
282
|
-
q = re.split(r"\bFROM\b", query, maxsplit=1, flags=re.I)
|
|
283
|
-
possible_names = (
|
|
284
|
-
{
|
|
285
|
-
nm.strip('"')
|
|
286
|
-
for nm in re.split(r"\b", q[1])
|
|
287
|
-
if re.match(r'^("[^"]+")$', nm) or nm.isidentifier()
|
|
288
|
-
}
|
|
289
|
-
if len(q) > 1
|
|
290
|
-
else set()
|
|
291
|
-
)
|
|
292
|
-
# get compatible frame objects from the globals, constraining by possible names
|
|
293
|
-
named_frames = _get_frame_locals(all_compatible=True, named=possible_names)
|
|
294
|
-
with cls(frames=named_frames, register_globals=False) as ctx:
|
|
295
|
-
return ctx.execute(query=query, eager=eager)
|
|
296
|
-
|
|
297
|
-
def __enter__(self) -> SQLContext[FrameType]:
|
|
298
|
-
"""Track currently registered tables on scope entry; supports nested scopes."""
|
|
299
|
-
self._tables_scope_stack = getattr(self, "_tables_scope_stack", [])
|
|
300
|
-
self._tables_scope_stack.append(set(self.tables()))
|
|
301
|
-
return self
|
|
302
|
-
|
|
303
|
-
def __exit__(
|
|
304
|
-
self,
|
|
305
|
-
exc_type: type[BaseException] | None,
|
|
306
|
-
exc_val: BaseException | None,
|
|
307
|
-
exc_tb: TracebackType | None,
|
|
308
|
-
) -> None:
|
|
309
|
-
"""
|
|
310
|
-
Unregister any tables created within the given scope on context exit.
|
|
311
|
-
|
|
312
|
-
See Also
|
|
313
|
-
--------
|
|
314
|
-
unregister
|
|
315
|
-
"""
|
|
316
|
-
self.unregister(
|
|
317
|
-
names=(set(self.tables()) - self._tables_scope_stack.pop()),
|
|
318
|
-
)
|
|
319
|
-
|
|
320
|
-
def __repr__(self) -> str:
|
|
321
|
-
n_tables = len(self.tables())
|
|
322
|
-
return f"<SQLContext [tables:{n_tables}] at 0x{id(self):x}>"
|
|
323
|
-
|
|
324
|
-
# these overloads are necessary to cover the possible permutations
|
|
325
|
-
# of the init-time "eager" param, and the local "eager" param.
|
|
326
|
-
|
|
327
|
-
@overload
|
|
328
|
-
def execute(
|
|
329
|
-
self: SQLContext[DataFrame], query: str, *, eager: None = ...
|
|
330
|
-
) -> DataFrame: ...
|
|
331
|
-
|
|
332
|
-
@overload
|
|
333
|
-
def execute(
|
|
334
|
-
self: SQLContext[DataFrame], query: str, *, eager: Literal[False]
|
|
335
|
-
) -> LazyFrame: ...
|
|
336
|
-
|
|
337
|
-
@overload
|
|
338
|
-
def execute(
|
|
339
|
-
self: SQLContext[DataFrame], query: str, *, eager: Literal[True]
|
|
340
|
-
) -> DataFrame: ...
|
|
341
|
-
|
|
342
|
-
@overload
|
|
343
|
-
def execute(
|
|
344
|
-
self: SQLContext[LazyFrame], query: str, *, eager: None = ...
|
|
345
|
-
) -> LazyFrame: ...
|
|
346
|
-
|
|
347
|
-
@overload
|
|
348
|
-
def execute(
|
|
349
|
-
self: SQLContext[LazyFrame], query: str, *, eager: Literal[False]
|
|
350
|
-
) -> LazyFrame: ...
|
|
351
|
-
|
|
352
|
-
@overload
|
|
353
|
-
def execute(
|
|
354
|
-
self: SQLContext[LazyFrame], query: str, *, eager: Literal[True]
|
|
355
|
-
) -> DataFrame: ...
|
|
356
|
-
|
|
357
|
-
@overload
|
|
358
|
-
def execute(
|
|
359
|
-
self, query: str, *, eager: bool | None = ...
|
|
360
|
-
) -> LazyFrame | DataFrame: ...
|
|
361
|
-
|
|
362
|
-
def execute(
|
|
363
|
-
self, query: str, *, eager: bool | None = None
|
|
364
|
-
) -> LazyFrame | DataFrame:
|
|
365
|
-
"""
|
|
366
|
-
Parse the given SQL query and execute it against the registered frame data.
|
|
367
|
-
|
|
368
|
-
Parameters
|
|
369
|
-
----------
|
|
370
|
-
query
|
|
371
|
-
A valid string SQL query.
|
|
372
|
-
eager
|
|
373
|
-
Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
|
374
|
-
If unset, the value of the init-time "eager" parameter will be used.
|
|
375
|
-
Note that the query itself is always executed in lazy-mode; this
|
|
376
|
-
parameter only impacts the type of the returned frame.
|
|
377
|
-
|
|
378
|
-
Examples
|
|
379
|
-
--------
|
|
380
|
-
Declare frame data and register with a SQLContext:
|
|
381
|
-
|
|
382
|
-
>>> df = pl.DataFrame(
|
|
383
|
-
... data=[
|
|
384
|
-
... ("The Godfather", 1972, 6_000_000, 134_821_952, 9.2),
|
|
385
|
-
... ("The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0),
|
|
386
|
-
... ("Schindler's List", 1993, 22_000_000, 96_067_179, 8.9),
|
|
387
|
-
... ("Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9),
|
|
388
|
-
... ("The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3),
|
|
389
|
-
... ],
|
|
390
|
-
... schema=["title", "release_year", "budget", "gross", "imdb_score"],
|
|
391
|
-
... orient="row",
|
|
392
|
-
... )
|
|
393
|
-
>>> ctx = pl.SQLContext(films=df)
|
|
394
|
-
|
|
395
|
-
Execute a SQL query against the registered frame data:
|
|
396
|
-
|
|
397
|
-
>>> ctx.execute(
|
|
398
|
-
... '''
|
|
399
|
-
... SELECT title, release_year, imdb_score
|
|
400
|
-
... FROM films
|
|
401
|
-
... WHERE release_year > 1990
|
|
402
|
-
... ORDER BY imdb_score DESC
|
|
403
|
-
... ''',
|
|
404
|
-
... eager=True,
|
|
405
|
-
... )
|
|
406
|
-
shape: (4, 3)
|
|
407
|
-
┌──────────────────────────┬──────────────┬────────────┐
|
|
408
|
-
│ title ┆ release_year ┆ imdb_score │
|
|
409
|
-
│ --- ┆ --- ┆ --- │
|
|
410
|
-
│ str ┆ i64 ┆ f64 │
|
|
411
|
-
╞══════════════════════════╪══════════════╪════════════╡
|
|
412
|
-
│ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
|
413
|
-
│ The Dark Knight ┆ 2008 ┆ 9.0 │
|
|
414
|
-
│ Schindler's List ┆ 1993 ┆ 8.9 │
|
|
415
|
-
│ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
|
416
|
-
└──────────────────────────┴──────────────┴────────────┘
|
|
417
|
-
|
|
418
|
-
Execute a GROUP BY query:
|
|
419
|
-
|
|
420
|
-
>>> ctx.execute(
|
|
421
|
-
... '''
|
|
422
|
-
... SELECT
|
|
423
|
-
... MAX(release_year / 10) * 10 AS decade,
|
|
424
|
-
... SUM(gross) AS total_gross,
|
|
425
|
-
... COUNT(title) AS n_films,
|
|
426
|
-
... FROM films
|
|
427
|
-
... GROUP BY (release_year / 10) -- decade
|
|
428
|
-
... ORDER BY total_gross DESC
|
|
429
|
-
... ''',
|
|
430
|
-
... eager=True,
|
|
431
|
-
... )
|
|
432
|
-
shape: (3, 3)
|
|
433
|
-
┌────────┬─────────────┬─────────┐
|
|
434
|
-
│ decade ┆ total_gross ┆ n_films │
|
|
435
|
-
│ --- ┆ --- ┆ --- │
|
|
436
|
-
│ i64 ┆ i64 ┆ u32 │
|
|
437
|
-
╞════════╪═════════════╪═════════╡
|
|
438
|
-
│ 2000 ┆ 533316061 ┆ 1 │
|
|
439
|
-
│ 1990 ┆ 232338648 ┆ 3 │
|
|
440
|
-
│ 1970 ┆ 134821952 ┆ 1 │
|
|
441
|
-
└────────┴─────────────┴─────────┘
|
|
442
|
-
"""
|
|
443
|
-
res = wrap_ldf(self._ctxt.execute(query))
|
|
444
|
-
return res.collect() if (eager or self._eager_execution) else res
|
|
445
|
-
|
|
446
|
-
def register(self, name: str, frame: CompatibleFrameType | None) -> Self:
|
|
447
|
-
"""
|
|
448
|
-
Register a single frame as a table, using the given name.
|
|
449
|
-
|
|
450
|
-
Parameters
|
|
451
|
-
----------
|
|
452
|
-
name
|
|
453
|
-
Name of the table.
|
|
454
|
-
frame
|
|
455
|
-
eager/lazy frame to associate with this table name.
|
|
456
|
-
|
|
457
|
-
See Also
|
|
458
|
-
--------
|
|
459
|
-
register_globals
|
|
460
|
-
register_many
|
|
461
|
-
unregister
|
|
462
|
-
|
|
463
|
-
Examples
|
|
464
|
-
--------
|
|
465
|
-
>>> df = pl.DataFrame({"hello": ["world"]})
|
|
466
|
-
>>> ctx = pl.SQLContext()
|
|
467
|
-
>>> ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect()
|
|
468
|
-
shape: (1, 1)
|
|
469
|
-
┌───────┐
|
|
470
|
-
│ hello │
|
|
471
|
-
│ --- │
|
|
472
|
-
│ str │
|
|
473
|
-
╞═══════╡
|
|
474
|
-
│ world │
|
|
475
|
-
└───────┘
|
|
476
|
-
"""
|
|
477
|
-
frame = LazyFrame() if frame is None else _ensure_lazyframe(frame)
|
|
478
|
-
self._ctxt.register(name, frame._ldf)
|
|
479
|
-
return self
|
|
480
|
-
|
|
481
|
-
def register_globals(
|
|
482
|
-
self, n: int | None = None, *, all_compatible: bool = True
|
|
483
|
-
) -> Self:
|
|
484
|
-
"""
|
|
485
|
-
Register all frames (lazy or eager) found in the current globals scope.
|
|
486
|
-
|
|
487
|
-
Automatically maps variable names to table names.
|
|
488
|
-
|
|
489
|
-
See Also
|
|
490
|
-
--------
|
|
491
|
-
register
|
|
492
|
-
register_many
|
|
493
|
-
unregister
|
|
494
|
-
|
|
495
|
-
Parameters
|
|
496
|
-
----------
|
|
497
|
-
n
|
|
498
|
-
Register only the most recent "n" frames.
|
|
499
|
-
all_compatible
|
|
500
|
-
Control whether we *also* register pandas DataFrame, Series, and
|
|
501
|
-
pyarrow Table and RecordBatch objects. If False, only Polars
|
|
502
|
-
classes are registered with the SQL engine.
|
|
503
|
-
|
|
504
|
-
Examples
|
|
505
|
-
--------
|
|
506
|
-
>>> df1 = pl.DataFrame({"a": [1, 2, 3], "b": ["x", None, "z"]})
|
|
507
|
-
>>> df2 = pl.DataFrame({"a": [2, 3, 4], "c": ["t", "w", "v"]})
|
|
508
|
-
|
|
509
|
-
Register frames directly from variables found in the current globals scope:
|
|
510
|
-
|
|
511
|
-
>>> ctx = pl.SQLContext(register_globals=True)
|
|
512
|
-
>>> ctx.tables()
|
|
513
|
-
['df1', 'df2']
|
|
514
|
-
|
|
515
|
-
Query using the register variable/frame names
|
|
516
|
-
|
|
517
|
-
>>> ctx.execute(
|
|
518
|
-
... "SELECT a, b, c FROM df1 LEFT JOIN df2 USING (a) ORDER BY a DESC"
|
|
519
|
-
... ).collect()
|
|
520
|
-
shape: (3, 3)
|
|
521
|
-
┌─────┬──────┬──────┐
|
|
522
|
-
│ a ┆ b ┆ c │
|
|
523
|
-
│ --- ┆ --- ┆ --- │
|
|
524
|
-
│ i64 ┆ str ┆ str │
|
|
525
|
-
╞═════╪══════╪══════╡
|
|
526
|
-
│ 3 ┆ z ┆ w │
|
|
527
|
-
│ 2 ┆ null ┆ t │
|
|
528
|
-
│ 1 ┆ x ┆ null │
|
|
529
|
-
└─────┴──────┴──────┘
|
|
530
|
-
"""
|
|
531
|
-
frames = _get_frame_locals(all_compatible=all_compatible, n_objects=n)
|
|
532
|
-
return self.register_many(frames=frames)
|
|
533
|
-
|
|
534
|
-
def register_many(
|
|
535
|
-
self,
|
|
536
|
-
frames: Mapping[str, CompatibleFrameType | None] | None = None,
|
|
537
|
-
**named_frames: CompatibleFrameType | None,
|
|
538
|
-
) -> Self:
|
|
539
|
-
"""
|
|
540
|
-
Register multiple eager/lazy frames as tables, using the associated names.
|
|
541
|
-
|
|
542
|
-
Parameters
|
|
543
|
-
----------
|
|
544
|
-
frames
|
|
545
|
-
A `{name:frame, ...}` mapping.
|
|
546
|
-
**named_frames
|
|
547
|
-
Named eager/lazy frames, provided as kwargs.
|
|
548
|
-
|
|
549
|
-
See Also
|
|
550
|
-
--------
|
|
551
|
-
register
|
|
552
|
-
register_globals
|
|
553
|
-
unregister
|
|
554
|
-
|
|
555
|
-
Examples
|
|
556
|
-
--------
|
|
557
|
-
>>> lf1 = pl.LazyFrame({"a": [1, 2, 3], "b": ["m", "n", "o"]})
|
|
558
|
-
>>> lf2 = pl.LazyFrame({"a": [2, 3, 4], "c": ["p", "q", "r"]})
|
|
559
|
-
>>> lf3 = pl.LazyFrame({"a": [3, 4, 5], "b": ["s", "t", "u"]})
|
|
560
|
-
>>> lf4 = pl.LazyFrame({"a": [4, 5, 6], "c": ["v", "w", "x"]})
|
|
561
|
-
|
|
562
|
-
Register multiple frames at once, either by passing in as a dict...
|
|
563
|
-
|
|
564
|
-
>>> ctx = pl.SQLContext().register_many({"tbl1": lf1, "tbl2": lf2})
|
|
565
|
-
>>> ctx.tables()
|
|
566
|
-
['tbl1', 'tbl2']
|
|
567
|
-
|
|
568
|
-
...or using keyword args:
|
|
569
|
-
|
|
570
|
-
>>> ctx.register_many(tbl3=lf3, tbl4=lf4).tables()
|
|
571
|
-
['tbl1', 'tbl2', 'tbl3', 'tbl4']
|
|
572
|
-
"""
|
|
573
|
-
frames = dict(frames or {})
|
|
574
|
-
frames.update(named_frames)
|
|
575
|
-
for name, frame in frames.items():
|
|
576
|
-
self.register(name, frame)
|
|
577
|
-
return self
|
|
578
|
-
|
|
579
|
-
def unregister(self, names: str | Collection[str]) -> Self:
|
|
580
|
-
"""
|
|
581
|
-
Unregister one or more eager/lazy frames by name.
|
|
582
|
-
|
|
583
|
-
Parameters
|
|
584
|
-
----------
|
|
585
|
-
names
|
|
586
|
-
Names of the tables to unregister.
|
|
587
|
-
|
|
588
|
-
Notes
|
|
589
|
-
-----
|
|
590
|
-
You can also control table registration lifetime by using `SQLContext` as a
|
|
591
|
-
context manager; this can often be more useful when such control is wanted:
|
|
592
|
-
|
|
593
|
-
>>> df0 = pl.DataFrame({"colx": [0, 1, 2]})
|
|
594
|
-
>>> df1 = pl.DataFrame({"colx": [1, 2, 3]})
|
|
595
|
-
>>> df2 = pl.DataFrame({"colx": [2, 3, 4]})
|
|
596
|
-
|
|
597
|
-
Frames registered in-scope are automatically unregistered on scope-exit. Note
|
|
598
|
-
that frames registered on construction will persist through subsequent scopes.
|
|
599
|
-
|
|
600
|
-
>>> # register one frame at construction time, and the other two in-scope
|
|
601
|
-
>>> with pl.SQLContext(tbl0=df0) as ctx:
|
|
602
|
-
... ctx.register_many(tbl1=df1, tbl2=df2).tables()
|
|
603
|
-
['tbl0', 'tbl1', 'tbl2']
|
|
604
|
-
|
|
605
|
-
After scope exit, none of the tables registered in-scope remain:
|
|
606
|
-
|
|
607
|
-
>>> ctx.tables()
|
|
608
|
-
['tbl0']
|
|
609
|
-
|
|
610
|
-
See Also
|
|
611
|
-
--------
|
|
612
|
-
register
|
|
613
|
-
register_globals
|
|
614
|
-
register_many
|
|
615
|
-
|
|
616
|
-
Examples
|
|
617
|
-
--------
|
|
618
|
-
>>> df0 = pl.DataFrame({"ints": [9, 8, 7, 6, 5]})
|
|
619
|
-
>>> lf1 = pl.LazyFrame({"text": ["a", "b", "c"]})
|
|
620
|
-
>>> lf2 = pl.LazyFrame({"misc": ["testing1234"]})
|
|
621
|
-
|
|
622
|
-
Register with a SQLContext object:
|
|
623
|
-
|
|
624
|
-
>>> ctx = pl.SQLContext(test1=df0, test2=lf1, test3=lf2)
|
|
625
|
-
>>> ctx.tables()
|
|
626
|
-
['test1', 'test2', 'test3']
|
|
627
|
-
|
|
628
|
-
Unregister one or more of the tables:
|
|
629
|
-
|
|
630
|
-
>>> ctx.unregister(["test1", "test3"]).tables()
|
|
631
|
-
['test2']
|
|
632
|
-
>>> ctx.unregister("test2").tables()
|
|
633
|
-
[]
|
|
634
|
-
"""
|
|
635
|
-
if isinstance(names, str):
|
|
636
|
-
names = [names]
|
|
637
|
-
for nm in names:
|
|
638
|
-
self._ctxt.unregister(nm)
|
|
639
|
-
return self
|
|
640
|
-
|
|
641
|
-
def tables(self) -> list[str]:
|
|
642
|
-
"""
|
|
643
|
-
Return a list of the registered table names.
|
|
644
|
-
|
|
645
|
-
Notes
|
|
646
|
-
-----
|
|
647
|
-
The :meth:`tables` method will return the same values as the
|
|
648
|
-
"SHOW TABLES" SQL statement, but as a list instead of a frame.
|
|
649
|
-
|
|
650
|
-
Executing as SQL:
|
|
651
|
-
|
|
652
|
-
>>> frame_data = pl.DataFrame({"hello": ["world"]})
|
|
653
|
-
>>> ctx = pl.SQLContext(hello_world=frame_data)
|
|
654
|
-
>>> ctx.execute("SHOW TABLES", eager=True)
|
|
655
|
-
shape: (1, 1)
|
|
656
|
-
┌─────────────┐
|
|
657
|
-
│ name │
|
|
658
|
-
│ --- │
|
|
659
|
-
│ str │
|
|
660
|
-
╞═════════════╡
|
|
661
|
-
│ hello_world │
|
|
662
|
-
└─────────────┘
|
|
663
|
-
|
|
664
|
-
Calling the method:
|
|
665
|
-
|
|
666
|
-
>>> ctx.tables()
|
|
667
|
-
['hello_world']
|
|
668
|
-
|
|
669
|
-
Examples
|
|
670
|
-
--------
|
|
671
|
-
>>> df1 = pl.DataFrame({"hello": ["world"]})
|
|
672
|
-
>>> df2 = pl.DataFrame({"foo": ["bar", "baz"]})
|
|
673
|
-
>>> ctx = pl.SQLContext(hello_data=df1, foo_bar=df2)
|
|
674
|
-
>>> ctx.tables()
|
|
675
|
-
['foo_bar', 'hello_data']
|
|
676
|
-
"""
|
|
677
|
-
return sorted(self._ctxt.get_tables())
|