polars-runtime-compat 1.34.0b2__cp39-abi3-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/.gitkeep +0 -0
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- polars/__init__.py +528 -0
- polars/_cpu_check.py +265 -0
- polars/_dependencies.py +355 -0
- polars/_plr.py +99 -0
- polars/_plr.pyi +2496 -0
- polars/_reexport.py +23 -0
- polars/_typing.py +478 -0
- polars/_utils/__init__.py +37 -0
- polars/_utils/async_.py +102 -0
- polars/_utils/cache.py +176 -0
- polars/_utils/cloud.py +40 -0
- polars/_utils/constants.py +29 -0
- polars/_utils/construction/__init__.py +46 -0
- polars/_utils/construction/dataframe.py +1397 -0
- polars/_utils/construction/other.py +72 -0
- polars/_utils/construction/series.py +560 -0
- polars/_utils/construction/utils.py +118 -0
- polars/_utils/convert.py +224 -0
- polars/_utils/deprecation.py +406 -0
- polars/_utils/getitem.py +457 -0
- polars/_utils/logging.py +11 -0
- polars/_utils/nest_asyncio.py +264 -0
- polars/_utils/parquet.py +15 -0
- polars/_utils/parse/__init__.py +12 -0
- polars/_utils/parse/expr.py +242 -0
- polars/_utils/polars_version.py +19 -0
- polars/_utils/pycapsule.py +53 -0
- polars/_utils/scan.py +27 -0
- polars/_utils/serde.py +63 -0
- polars/_utils/slice.py +215 -0
- polars/_utils/udfs.py +1251 -0
- polars/_utils/unstable.py +63 -0
- polars/_utils/various.py +782 -0
- polars/_utils/wrap.py +25 -0
- polars/api.py +370 -0
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +19 -0
- polars/catalog/unity/client.py +733 -0
- polars/catalog/unity/models.py +152 -0
- polars/config.py +1571 -0
- polars/convert/__init__.py +25 -0
- polars/convert/general.py +1046 -0
- polars/convert/normalize.py +261 -0
- polars/dataframe/__init__.py +5 -0
- polars/dataframe/_html.py +186 -0
- polars/dataframe/frame.py +12582 -0
- polars/dataframe/group_by.py +1067 -0
- polars/dataframe/plotting.py +257 -0
- polars/datatype_expr/__init__.py +5 -0
- polars/datatype_expr/array.py +56 -0
- polars/datatype_expr/datatype_expr.py +304 -0
- polars/datatype_expr/list.py +18 -0
- polars/datatype_expr/struct.py +69 -0
- polars/datatypes/__init__.py +122 -0
- polars/datatypes/_parse.py +195 -0
- polars/datatypes/_utils.py +48 -0
- polars/datatypes/classes.py +1213 -0
- polars/datatypes/constants.py +11 -0
- polars/datatypes/constructor.py +172 -0
- polars/datatypes/convert.py +366 -0
- polars/datatypes/group.py +130 -0
- polars/exceptions.py +230 -0
- polars/expr/__init__.py +7 -0
- polars/expr/array.py +964 -0
- polars/expr/binary.py +346 -0
- polars/expr/categorical.py +306 -0
- polars/expr/datetime.py +2620 -0
- polars/expr/expr.py +11272 -0
- polars/expr/list.py +1408 -0
- polars/expr/meta.py +444 -0
- polars/expr/name.py +321 -0
- polars/expr/string.py +3045 -0
- polars/expr/struct.py +357 -0
- polars/expr/whenthen.py +185 -0
- polars/functions/__init__.py +193 -0
- polars/functions/aggregation/__init__.py +33 -0
- polars/functions/aggregation/horizontal.py +298 -0
- polars/functions/aggregation/vertical.py +341 -0
- polars/functions/as_datatype.py +848 -0
- polars/functions/business.py +138 -0
- polars/functions/col.py +384 -0
- polars/functions/datatype.py +121 -0
- polars/functions/eager.py +524 -0
- polars/functions/escape_regex.py +29 -0
- polars/functions/lazy.py +2751 -0
- polars/functions/len.py +68 -0
- polars/functions/lit.py +210 -0
- polars/functions/random.py +22 -0
- polars/functions/range/__init__.py +19 -0
- polars/functions/range/_utils.py +15 -0
- polars/functions/range/date_range.py +303 -0
- polars/functions/range/datetime_range.py +370 -0
- polars/functions/range/int_range.py +348 -0
- polars/functions/range/linear_space.py +311 -0
- polars/functions/range/time_range.py +287 -0
- polars/functions/repeat.py +301 -0
- polars/functions/whenthen.py +353 -0
- polars/interchange/__init__.py +10 -0
- polars/interchange/buffer.py +77 -0
- polars/interchange/column.py +190 -0
- polars/interchange/dataframe.py +230 -0
- polars/interchange/from_dataframe.py +328 -0
- polars/interchange/protocol.py +303 -0
- polars/interchange/utils.py +170 -0
- polars/io/__init__.py +64 -0
- polars/io/_utils.py +317 -0
- polars/io/avro.py +49 -0
- polars/io/clipboard.py +36 -0
- polars/io/cloud/__init__.py +17 -0
- polars/io/cloud/_utils.py +80 -0
- polars/io/cloud/credential_provider/__init__.py +17 -0
- polars/io/cloud/credential_provider/_builder.py +520 -0
- polars/io/cloud/credential_provider/_providers.py +618 -0
- polars/io/csv/__init__.py +9 -0
- polars/io/csv/_utils.py +38 -0
- polars/io/csv/batched_reader.py +142 -0
- polars/io/csv/functions.py +1495 -0
- polars/io/database/__init__.py +6 -0
- polars/io/database/_arrow_registry.py +70 -0
- polars/io/database/_cursor_proxies.py +147 -0
- polars/io/database/_executor.py +578 -0
- polars/io/database/_inference.py +314 -0
- polars/io/database/_utils.py +144 -0
- polars/io/database/functions.py +516 -0
- polars/io/delta.py +499 -0
- polars/io/iceberg/__init__.py +3 -0
- polars/io/iceberg/_utils.py +697 -0
- polars/io/iceberg/dataset.py +556 -0
- polars/io/iceberg/functions.py +151 -0
- polars/io/ipc/__init__.py +8 -0
- polars/io/ipc/functions.py +514 -0
- polars/io/json/__init__.py +3 -0
- polars/io/json/read.py +101 -0
- polars/io/ndjson.py +332 -0
- polars/io/parquet/__init__.py +17 -0
- polars/io/parquet/field_overwrites.py +140 -0
- polars/io/parquet/functions.py +722 -0
- polars/io/partition.py +491 -0
- polars/io/plugins.py +187 -0
- polars/io/pyarrow_dataset/__init__.py +5 -0
- polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
- polars/io/pyarrow_dataset/functions.py +79 -0
- polars/io/scan_options/__init__.py +5 -0
- polars/io/scan_options/_options.py +59 -0
- polars/io/scan_options/cast_options.py +126 -0
- polars/io/spreadsheet/__init__.py +6 -0
- polars/io/spreadsheet/_utils.py +52 -0
- polars/io/spreadsheet/_write_utils.py +647 -0
- polars/io/spreadsheet/functions.py +1323 -0
- polars/lazyframe/__init__.py +9 -0
- polars/lazyframe/engine_config.py +61 -0
- polars/lazyframe/frame.py +8564 -0
- polars/lazyframe/group_by.py +669 -0
- polars/lazyframe/in_process.py +42 -0
- polars/lazyframe/opt_flags.py +333 -0
- polars/meta/__init__.py +14 -0
- polars/meta/build.py +33 -0
- polars/meta/index_type.py +27 -0
- polars/meta/thread_pool.py +50 -0
- polars/meta/versions.py +120 -0
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +213 -0
- polars/ml/utilities.py +30 -0
- polars/plugins.py +155 -0
- polars/py.typed +0 -0
- polars/pyproject.toml +96 -0
- polars/schema.py +265 -0
- polars/selectors.py +3117 -0
- polars/series/__init__.py +5 -0
- polars/series/array.py +776 -0
- polars/series/binary.py +254 -0
- polars/series/categorical.py +246 -0
- polars/series/datetime.py +2275 -0
- polars/series/list.py +1087 -0
- polars/series/plotting.py +191 -0
- polars/series/series.py +9197 -0
- polars/series/string.py +2367 -0
- polars/series/struct.py +154 -0
- polars/series/utils.py +191 -0
- polars/sql/__init__.py +7 -0
- polars/sql/context.py +677 -0
- polars/sql/functions.py +139 -0
- polars/string_cache.py +185 -0
- polars/testing/__init__.py +13 -0
- polars/testing/asserts/__init__.py +9 -0
- polars/testing/asserts/frame.py +231 -0
- polars/testing/asserts/series.py +219 -0
- polars/testing/asserts/utils.py +12 -0
- polars/testing/parametric/__init__.py +33 -0
- polars/testing/parametric/profiles.py +107 -0
- polars/testing/parametric/strategies/__init__.py +22 -0
- polars/testing/parametric/strategies/_utils.py +14 -0
- polars/testing/parametric/strategies/core.py +615 -0
- polars/testing/parametric/strategies/data.py +452 -0
- polars/testing/parametric/strategies/dtype.py +436 -0
- polars/testing/parametric/strategies/legacy.py +169 -0
- polars/type_aliases.py +24 -0
- polars_runtime_compat-1.34.0b2.dist-info/METADATA +31 -0
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
- polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
- polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +1 -0
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Literal, overload
|
|
5
|
+
|
|
6
|
+
from polars._dependencies import import_optional
|
|
7
|
+
from polars._utils.unstable import issue_unstable_warning
|
|
8
|
+
from polars._utils.various import qualified_type_name
|
|
9
|
+
from polars.datatypes import N_INFER_DEFAULT
|
|
10
|
+
from polars.io.database._cursor_proxies import ODBCCursorProxy
|
|
11
|
+
from polars.io.database._executor import ConnectionExecutor
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Iterator
|
|
15
|
+
|
|
16
|
+
from sqlalchemy.sql.elements import TextClause
|
|
17
|
+
from sqlalchemy.sql.expression import Selectable
|
|
18
|
+
|
|
19
|
+
from polars import DataFrame
|
|
20
|
+
from polars._typing import ConnectionOrCursor, DbReadEngine, SchemaDict
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@overload
|
|
24
|
+
def read_database(
|
|
25
|
+
query: str | TextClause | Selectable,
|
|
26
|
+
connection: ConnectionOrCursor | str,
|
|
27
|
+
*,
|
|
28
|
+
iter_batches: Literal[False] = ...,
|
|
29
|
+
batch_size: int | None = ...,
|
|
30
|
+
schema_overrides: SchemaDict | None = ...,
|
|
31
|
+
infer_schema_length: int | None = ...,
|
|
32
|
+
execute_options: dict[str, Any] | None = ...,
|
|
33
|
+
) -> DataFrame: ...
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@overload
|
|
37
|
+
def read_database(
|
|
38
|
+
query: str | TextClause | Selectable,
|
|
39
|
+
connection: ConnectionOrCursor | str,
|
|
40
|
+
*,
|
|
41
|
+
iter_batches: Literal[True],
|
|
42
|
+
batch_size: int | None = ...,
|
|
43
|
+
schema_overrides: SchemaDict | None = ...,
|
|
44
|
+
infer_schema_length: int | None = ...,
|
|
45
|
+
execute_options: dict[str, Any] | None = ...,
|
|
46
|
+
) -> Iterator[DataFrame]: ...
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@overload
|
|
50
|
+
def read_database(
|
|
51
|
+
query: str | TextClause | Selectable,
|
|
52
|
+
connection: ConnectionOrCursor | str,
|
|
53
|
+
*,
|
|
54
|
+
iter_batches: bool,
|
|
55
|
+
batch_size: int | None = ...,
|
|
56
|
+
schema_overrides: SchemaDict | None = ...,
|
|
57
|
+
infer_schema_length: int | None = ...,
|
|
58
|
+
execute_options: dict[str, Any] | None = ...,
|
|
59
|
+
) -> DataFrame | Iterator[DataFrame]: ...
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def read_database(
|
|
63
|
+
query: str | TextClause | Selectable,
|
|
64
|
+
connection: ConnectionOrCursor | str,
|
|
65
|
+
*,
|
|
66
|
+
iter_batches: bool = False,
|
|
67
|
+
batch_size: int | None = None,
|
|
68
|
+
schema_overrides: SchemaDict | None = None,
|
|
69
|
+
infer_schema_length: int | None = N_INFER_DEFAULT,
|
|
70
|
+
execute_options: dict[str, Any] | None = None,
|
|
71
|
+
) -> DataFrame | Iterator[DataFrame]:
|
|
72
|
+
"""
|
|
73
|
+
Read the results of a SQL query into a DataFrame, given a connection object.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
query
|
|
78
|
+
SQL query to execute (if using a SQLAlchemy connection object this can
|
|
79
|
+
be a suitable "Selectable", otherwise it is expected to be a string).
|
|
80
|
+
connection
|
|
81
|
+
An instantiated connection (or cursor/client object) that the query can be
|
|
82
|
+
executed against. Can also pass a valid ODBC connection string (identified as
|
|
83
|
+
such if it contains the string "Driver={...}"), in which case the `arrow-odbc`
|
|
84
|
+
package will be used to establish the connection and return Arrow-native data
|
|
85
|
+
to Polars. Async driver connections are also supported, though this is currently
|
|
86
|
+
considered unstable. If using SQLAlchemy, you can configure the connection's
|
|
87
|
+
`execution_options` before passing to `read_database` to refine its behaviour
|
|
88
|
+
(see the `iter_batches` parameter for an example where this can be useful).
|
|
89
|
+
|
|
90
|
+
.. warning::
|
|
91
|
+
Use of asynchronous connections is currently considered **unstable**, and
|
|
92
|
+
unexpected issues may arise; if this happens, please report them.
|
|
93
|
+
iter_batches
|
|
94
|
+
Return an iterator of DataFrames, where each DataFrame represents a batch of
|
|
95
|
+
data returned by the query; this can be useful for processing large resultsets
|
|
96
|
+
in a more memory-efficient manner. If supported by the backend, this value is
|
|
97
|
+
passed to the underlying query execution method (note that lower values will
|
|
98
|
+
typically result in poor performance as they will cause many round-trips to
|
|
99
|
+
the database). If the backend does not support changing the batch size then
|
|
100
|
+
a single DataFrame is yielded from the iterator.
|
|
101
|
+
|
|
102
|
+
.. note::
|
|
103
|
+
If using SQLALchemy, you may also want to pass `stream_results=True` to the
|
|
104
|
+
connection's `execution_options` method when setting this parameter, which
|
|
105
|
+
will establish a server-side cursor; without this option some drivers (such
|
|
106
|
+
as "psycopg2") will still materialise the entire result set client-side
|
|
107
|
+
before batching the result locally.
|
|
108
|
+
batch_size
|
|
109
|
+
Indicate the size of each batch when `iter_batches` is True (note that you can
|
|
110
|
+
still set this when `iter_batches` is False, in which case the resulting
|
|
111
|
+
DataFrame is constructed internally using batched return before being returned
|
|
112
|
+
to you. Note that some backends (such as Snowflake) may support batch operation
|
|
113
|
+
but not allow for an explicit size to be set; in this case you will still
|
|
114
|
+
receive batches but their size is determined by the backend (in which case any
|
|
115
|
+
value set here will be ignored).
|
|
116
|
+
schema_overrides
|
|
117
|
+
A dictionary mapping column names to dtypes, used to override the schema
|
|
118
|
+
inferred from the query cursor or given by the incoming Arrow data (depending
|
|
119
|
+
on driver/backend). This can be useful if the given types can be more precisely
|
|
120
|
+
defined (for example, if you know that a given column can be declared as `u32`
|
|
121
|
+
instead of `i64`).
|
|
122
|
+
infer_schema_length
|
|
123
|
+
The maximum number of rows to scan for schema inference. If set to `None`, the
|
|
124
|
+
full data may be scanned *(this can be slow)*. This parameter only applies if
|
|
125
|
+
the data is read as a sequence of rows and the `schema_overrides` parameter
|
|
126
|
+
is not set for the given column; Arrow-aware drivers also ignore this value.
|
|
127
|
+
execute_options
|
|
128
|
+
These options will be passed through into the underlying query execution method
|
|
129
|
+
as kwargs. In the case of connections made using an ODBC string (which use
|
|
130
|
+
`arrow-odbc`) these options are passed to the `read_arrow_batches_from_odbc`
|
|
131
|
+
method.
|
|
132
|
+
|
|
133
|
+
Notes
|
|
134
|
+
-----
|
|
135
|
+
* This function supports a wide range of native database drivers (ranging from local
|
|
136
|
+
databases such as SQLite to large cloud databases such as Snowflake), as well as
|
|
137
|
+
generic libraries such as ADBC, SQLAlchemy and various flavours of ODBC. If the
|
|
138
|
+
backend supports returning Arrow data directly then this facility will be used to
|
|
139
|
+
efficiently instantiate the DataFrame; otherwise, the DataFrame is initialised
|
|
140
|
+
from row-wise data.
|
|
141
|
+
|
|
142
|
+
* Support for Arrow Flight SQL data is available via the `adbc-driver-flightsql`
|
|
143
|
+
package; see https://arrow.apache.org/adbc/current/driver/flight_sql.html for
|
|
144
|
+
more details about using this driver (notable databases implementing Flight SQL
|
|
145
|
+
include Dremio and InfluxDB).
|
|
146
|
+
|
|
147
|
+
* The `read_database_uri` function can be noticeably faster than `read_database`
|
|
148
|
+
if you are using a SQLAlchemy or DBAPI2 connection, as `connectorx` and `adbc`
|
|
149
|
+
optimise translation of the result set into Arrow format. Note that you can
|
|
150
|
+
determine a connection's URI from a SQLAlchemy engine object by calling
|
|
151
|
+
`conn.engine.url.render_as_string(hide_password=False)`.
|
|
152
|
+
|
|
153
|
+
* If Polars has to create a cursor from your connection in order to execute the
|
|
154
|
+
query then that cursor will be automatically closed when the query completes;
|
|
155
|
+
however, Polars will *never* close any other open connection or cursor.
|
|
156
|
+
|
|
157
|
+
* Polars is able to support more than just relational databases and SQL queries
|
|
158
|
+
through this function. For example, you can load local graph database results
|
|
159
|
+
from a `KùzuDB` connection in conjunction with a Cypher query, or use SurrealQL
|
|
160
|
+
with SurrealDB.
|
|
161
|
+
|
|
162
|
+
See Also
|
|
163
|
+
--------
|
|
164
|
+
read_database_uri : Create a DataFrame from a SQL query using a URI string.
|
|
165
|
+
|
|
166
|
+
Examples
|
|
167
|
+
--------
|
|
168
|
+
Instantiate a DataFrame from a SQL query against a user-supplied connection:
|
|
169
|
+
|
|
170
|
+
>>> df = pl.read_database(
|
|
171
|
+
... query="SELECT * FROM test_data",
|
|
172
|
+
... connection=user_conn,
|
|
173
|
+
... schema_overrides={"normalised_score": pl.UInt8},
|
|
174
|
+
... ) # doctest: +SKIP
|
|
175
|
+
|
|
176
|
+
Use a parameterised SQLAlchemy query, passing named values via `execute_options`:
|
|
177
|
+
|
|
178
|
+
>>> df = pl.read_database(
|
|
179
|
+
... query="SELECT * FROM test_data WHERE metric > :value",
|
|
180
|
+
... connection=alchemy_conn,
|
|
181
|
+
... execute_options={"parameters": {"value": 0}},
|
|
182
|
+
... ) # doctest: +SKIP
|
|
183
|
+
|
|
184
|
+
Use 'qmark' style parameterisation; values are still passed via `execute_options`,
|
|
185
|
+
but in this case the "parameters" value is a sequence of literals, not a dict:
|
|
186
|
+
|
|
187
|
+
>>> df = pl.read_database(
|
|
188
|
+
... query="SELECT * FROM test_data WHERE metric > ?",
|
|
189
|
+
... connection=alchemy_conn,
|
|
190
|
+
... execute_options={"parameters": [0]},
|
|
191
|
+
... ) # doctest: +SKIP
|
|
192
|
+
|
|
193
|
+
Batch the results of a large SQLAlchemy query into DataFrames, each containing
|
|
194
|
+
100,000 rows; explicitly establish a server-side cursor using the connection's
|
|
195
|
+
"execution_options" method to avoid loading the entire result locally before
|
|
196
|
+
batching (this is not required for all drivers, so check your driver's
|
|
197
|
+
documentation for more details):
|
|
198
|
+
|
|
199
|
+
>>> for df in pl.read_database(
|
|
200
|
+
... query="SELECT * FROM test_data",
|
|
201
|
+
... connection=alchemy_conn.execution_options(stream_results=True),
|
|
202
|
+
... iter_batches=True,
|
|
203
|
+
... batch_size=100_000,
|
|
204
|
+
... ):
|
|
205
|
+
... do_something(df) # doctest: +SKIP
|
|
206
|
+
|
|
207
|
+
Instantiate a DataFrame using an ODBC connection string (requires the `arrow-odbc`
|
|
208
|
+
package) setting upper limits on the buffer size of variadic text/binary columns:
|
|
209
|
+
|
|
210
|
+
>>> df = pl.read_database(
|
|
211
|
+
... query="SELECT * FROM test_data",
|
|
212
|
+
... connection="Driver={PostgreSQL};Server=localhost;Port=5432;Database=test;Uid=usr;Pwd=",
|
|
213
|
+
... execute_options={"max_text_size": 512, "max_binary_size": 1024},
|
|
214
|
+
... ) # doctest: +SKIP
|
|
215
|
+
|
|
216
|
+
Load graph database results from a `KùzuDB` connection and a Cypher query:
|
|
217
|
+
|
|
218
|
+
>>> df = pl.read_database(
|
|
219
|
+
... query="MATCH (a:User)-[f:Follows]->(b:User) RETURN a.name, f.since, b.name",
|
|
220
|
+
... connection=kuzu_db_conn,
|
|
221
|
+
... ) # doctest: +SKIP
|
|
222
|
+
|
|
223
|
+
Load data from an asynchronous SQLAlchemy driver/engine; note that asynchronous
|
|
224
|
+
connections and sessions are also supported here:
|
|
225
|
+
|
|
226
|
+
>>> from sqlalchemy.ext.asyncio import create_async_engine
|
|
227
|
+
>>> async_engine = create_async_engine("sqlite+aiosqlite:///test.db")
|
|
228
|
+
>>> df = pl.read_database(
|
|
229
|
+
... query="SELECT * FROM test_data",
|
|
230
|
+
... connection=async_engine,
|
|
231
|
+
... ) # doctest: +SKIP
|
|
232
|
+
|
|
233
|
+
Load data from an `AsyncSurrealDB` client connection object; note that both the "ws"
|
|
234
|
+
and "http" protocols are supported, as is the synchronous `SurrealDB` client. The
|
|
235
|
+
async loop can be run with standard `asyncio` or with `uvloop`:
|
|
236
|
+
|
|
237
|
+
>>> import asyncio # (or uvloop)
|
|
238
|
+
>>> async def surreal_query_to_frame(query: str, url: str):
|
|
239
|
+
... async with AsyncSurrealDB(url) as client:
|
|
240
|
+
... await client.use(namespace="test", database="test")
|
|
241
|
+
... return pl.read_database(query=query, connection=client)
|
|
242
|
+
>>> df = asyncio.run(
|
|
243
|
+
... surreal_query_to_frame(
|
|
244
|
+
... query="SELECT * FROM test",
|
|
245
|
+
... url="http://localhost:8000",
|
|
246
|
+
... )
|
|
247
|
+
... ) # doctest: +SKIP
|
|
248
|
+
|
|
249
|
+
""" # noqa: W505
|
|
250
|
+
if isinstance(connection, str):
|
|
251
|
+
# check for odbc connection string
|
|
252
|
+
if re.search(r"\bdriver\s*=\s*{[^}]+?}", connection, re.IGNORECASE):
|
|
253
|
+
_ = import_optional(
|
|
254
|
+
module_name="arrow_odbc",
|
|
255
|
+
err_prefix="use of ODBC connection string requires the",
|
|
256
|
+
err_suffix="package",
|
|
257
|
+
)
|
|
258
|
+
connection = ODBCCursorProxy(connection)
|
|
259
|
+
elif "://" in connection:
|
|
260
|
+
# otherwise looks like a mistaken call to read_database_uri
|
|
261
|
+
msg = "string URI is invalid here; call `read_database_uri` instead"
|
|
262
|
+
raise ValueError(msg)
|
|
263
|
+
else:
|
|
264
|
+
msg = "unable to identify string connection as valid ODBC (no driver)"
|
|
265
|
+
raise ValueError(msg)
|
|
266
|
+
|
|
267
|
+
# return frame from arbitrary connections using the executor abstraction
|
|
268
|
+
with ConnectionExecutor(connection) as cx:
|
|
269
|
+
return cx.execute(
|
|
270
|
+
query=query,
|
|
271
|
+
options=execute_options,
|
|
272
|
+
).to_polars(
|
|
273
|
+
batch_size=batch_size,
|
|
274
|
+
iter_batches=iter_batches,
|
|
275
|
+
schema_overrides=schema_overrides,
|
|
276
|
+
infer_schema_length=infer_schema_length,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@overload
|
|
281
|
+
def read_database_uri(
|
|
282
|
+
query: str,
|
|
283
|
+
uri: str,
|
|
284
|
+
*,
|
|
285
|
+
partition_on: str | None = None,
|
|
286
|
+
partition_range: tuple[int, int] | None = None,
|
|
287
|
+
partition_num: int | None = None,
|
|
288
|
+
protocol: str | None = None,
|
|
289
|
+
engine: Literal["adbc"],
|
|
290
|
+
schema_overrides: SchemaDict | None = None,
|
|
291
|
+
execute_options: dict[str, Any] | None = None,
|
|
292
|
+
pre_execution_query: str | list[str] | None = None,
|
|
293
|
+
) -> DataFrame: ...
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
@overload
|
|
297
|
+
def read_database_uri(
|
|
298
|
+
query: list[str] | str,
|
|
299
|
+
uri: str,
|
|
300
|
+
*,
|
|
301
|
+
partition_on: str | None = None,
|
|
302
|
+
partition_range: tuple[int, int] | None = None,
|
|
303
|
+
partition_num: int | None = None,
|
|
304
|
+
protocol: str | None = None,
|
|
305
|
+
engine: Literal["connectorx"] | None = None,
|
|
306
|
+
schema_overrides: SchemaDict | None = None,
|
|
307
|
+
execute_options: None = None,
|
|
308
|
+
pre_execution_query: str | list[str] | None = None,
|
|
309
|
+
) -> DataFrame: ...
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@overload
|
|
313
|
+
def read_database_uri(
|
|
314
|
+
query: str,
|
|
315
|
+
uri: str,
|
|
316
|
+
*,
|
|
317
|
+
partition_on: str | None = None,
|
|
318
|
+
partition_range: tuple[int, int] | None = None,
|
|
319
|
+
partition_num: int | None = None,
|
|
320
|
+
protocol: str | None = None,
|
|
321
|
+
engine: DbReadEngine | None = None,
|
|
322
|
+
schema_overrides: None = None,
|
|
323
|
+
execute_options: dict[str, Any] | None = None,
|
|
324
|
+
pre_execution_query: str | list[str] | None = None,
|
|
325
|
+
) -> DataFrame: ...
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def read_database_uri(
|
|
329
|
+
query: list[str] | str,
|
|
330
|
+
uri: str,
|
|
331
|
+
*,
|
|
332
|
+
partition_on: str | None = None,
|
|
333
|
+
partition_range: tuple[int, int] | None = None,
|
|
334
|
+
partition_num: int | None = None,
|
|
335
|
+
protocol: str | None = None,
|
|
336
|
+
engine: DbReadEngine | None = None,
|
|
337
|
+
schema_overrides: SchemaDict | None = None,
|
|
338
|
+
execute_options: dict[str, Any] | None = None,
|
|
339
|
+
pre_execution_query: str | list[str] | None = None,
|
|
340
|
+
) -> DataFrame:
|
|
341
|
+
"""
|
|
342
|
+
Read the results of a SQL query into a DataFrame, given a URI.
|
|
343
|
+
|
|
344
|
+
Parameters
|
|
345
|
+
----------
|
|
346
|
+
query
|
|
347
|
+
Raw SQL query (or queries).
|
|
348
|
+
uri
|
|
349
|
+
A connectorx or ADBC connection URI string that starts with the backend's
|
|
350
|
+
driver name, for example:
|
|
351
|
+
|
|
352
|
+
* "postgresql://user:pass@server:port/database"
|
|
353
|
+
* "snowflake://user:pass@account/database/schema?warehouse=warehouse&role=role"
|
|
354
|
+
|
|
355
|
+
The caller is responsible for escaping any special characters in the string,
|
|
356
|
+
which will be passed "as-is" to the underlying engine (this is most often
|
|
357
|
+
required when coming across special characters in the password).
|
|
358
|
+
partition_on
|
|
359
|
+
The column on which to partition the result (connectorx).
|
|
360
|
+
partition_range
|
|
361
|
+
The value range of the partition column (connectorx).
|
|
362
|
+
partition_num
|
|
363
|
+
How many partitions to generate (connectorx).
|
|
364
|
+
protocol
|
|
365
|
+
Backend-specific transfer protocol directive (connectorx); see connectorx
|
|
366
|
+
documentation for more details.
|
|
367
|
+
engine : {'connectorx', 'adbc'}
|
|
368
|
+
Selects the engine used for reading the database (defaulting to connectorx):
|
|
369
|
+
|
|
370
|
+
* `'connectorx'`
|
|
371
|
+
Supports a range of databases, such as PostgreSQL, Redshift, MySQL, MariaDB,
|
|
372
|
+
Clickhouse, Oracle, BigQuery, SQL Server, and so on. For an up-to-date list
|
|
373
|
+
please see the connectorx docs:
|
|
374
|
+
https://github.com/sfu-db/connector-x#supported-sources--destinations
|
|
375
|
+
* `'adbc'`
|
|
376
|
+
Currently there is limited support for this engine, with a relatively small
|
|
377
|
+
number of drivers available, most of which are still in development. For
|
|
378
|
+
an up-to-date list of drivers please see the ADBC docs:
|
|
379
|
+
https://arrow.apache.org/adbc/
|
|
380
|
+
schema_overrides
|
|
381
|
+
A dictionary mapping column names to dtypes, used to override the schema
|
|
382
|
+
given in the data returned by the query.
|
|
383
|
+
execute_options
|
|
384
|
+
These options will be passed to the underlying query execution method as
|
|
385
|
+
kwargs. Note that connectorx does not support this parameter and ADBC currently
|
|
386
|
+
only supports positional 'qmark' style parameterization.
|
|
387
|
+
pre_execution_query
|
|
388
|
+
SQL query or list of SQL queries executed before main query (connectorx>=0.4.2).
|
|
389
|
+
Can be used to set runtime configurations using SET statements.
|
|
390
|
+
Only applicable for Postgres and MySQL source.
|
|
391
|
+
Only applicable with the connectorx engine.
|
|
392
|
+
|
|
393
|
+
.. warning::
|
|
394
|
+
This functionality is considered **unstable**. It may be changed
|
|
395
|
+
at any point without it being considered a breaking change.
|
|
396
|
+
|
|
397
|
+
Notes
|
|
398
|
+
-----
|
|
399
|
+
For `connectorx`, ensure that you have `connectorx>=0.3.2`. The documentation
|
|
400
|
+
is available `here <https://sfu-db.github.io/connector-x/intro.html>`_.
|
|
401
|
+
|
|
402
|
+
For `adbc` you will need to have installed `pyarrow` and the ADBC driver associated
|
|
403
|
+
with the backend you are connecting to, eg: `adbc-driver-postgresql`.
|
|
404
|
+
|
|
405
|
+
If your password contains special characters, you will need to escape them.
|
|
406
|
+
This will usually require the use of a URL-escaping function, for example:
|
|
407
|
+
|
|
408
|
+
>>> from urllib.parse import quote, quote_plus
|
|
409
|
+
>>> quote_plus("pass word?")
|
|
410
|
+
'pass+word%3F'
|
|
411
|
+
>>> quote("pass word?")
|
|
412
|
+
'pass%20word%3F'
|
|
413
|
+
|
|
414
|
+
See Also
|
|
415
|
+
--------
|
|
416
|
+
read_database : Create a DataFrame from a SQL query using a connection object.
|
|
417
|
+
|
|
418
|
+
Examples
|
|
419
|
+
--------
|
|
420
|
+
Create a DataFrame from a SQL query using a single thread:
|
|
421
|
+
|
|
422
|
+
>>> uri = "postgresql://username:password@server:port/database"
|
|
423
|
+
>>> query = "SELECT * FROM lineitem"
|
|
424
|
+
>>> pl.read_database_uri(query, uri) # doctest: +SKIP
|
|
425
|
+
|
|
426
|
+
Create a DataFrame in parallel using 10 threads by automatically partitioning
|
|
427
|
+
the provided SQL on the partition column:
|
|
428
|
+
|
|
429
|
+
>>> uri = "postgresql://username:password@server:port/database"
|
|
430
|
+
>>> query = "SELECT * FROM lineitem"
|
|
431
|
+
>>> pl.read_database_uri(
|
|
432
|
+
... query,
|
|
433
|
+
... uri,
|
|
434
|
+
... partition_on="partition_col",
|
|
435
|
+
... partition_num=10,
|
|
436
|
+
... engine="connectorx",
|
|
437
|
+
... ) # doctest: +SKIP
|
|
438
|
+
|
|
439
|
+
Create a DataFrame in parallel using 2 threads by explicitly providing two
|
|
440
|
+
SQL queries:
|
|
441
|
+
|
|
442
|
+
>>> uri = "postgresql://username:password@server:port/database"
|
|
443
|
+
>>> queries = [
|
|
444
|
+
... "SELECT * FROM lineitem WHERE partition_col <= 10",
|
|
445
|
+
... "SELECT * FROM lineitem WHERE partition_col > 10",
|
|
446
|
+
... ]
|
|
447
|
+
>>> pl.read_database_uri(queries, uri, engine="connectorx") # doctest: +SKIP
|
|
448
|
+
|
|
449
|
+
Read data from Snowflake using the ADBC driver:
|
|
450
|
+
|
|
451
|
+
>>> df = pl.read_database_uri(
|
|
452
|
+
... "SELECT * FROM test_table",
|
|
453
|
+
... "snowflake://user:pass@company-org/testdb/public?warehouse=test&role=myrole",
|
|
454
|
+
... engine="adbc",
|
|
455
|
+
... ) # doctest: +SKIP
|
|
456
|
+
|
|
457
|
+
Pass a single parameter via `execute_options` into a query using the ADBC driver:
|
|
458
|
+
|
|
459
|
+
>>> df = pl.read_database_uri(
|
|
460
|
+
... "SELECT * FROM employees WHERE hourly_rate > ?",
|
|
461
|
+
... "sqlite:///:memory:",
|
|
462
|
+
... engine="adbc",
|
|
463
|
+
... execute_options={"parameters": (30,)},
|
|
464
|
+
... ) # doctest: +SKIP
|
|
465
|
+
|
|
466
|
+
Or pass multiple parameters:
|
|
467
|
+
|
|
468
|
+
>>> df = pl.read_database_uri(
|
|
469
|
+
... "SELECT * FROM employees WHERE hourly_rate BETWEEN ? AND ?",
|
|
470
|
+
... "sqlite:///:memory:",
|
|
471
|
+
... engine="adbc",
|
|
472
|
+
... execute_options={"parameters": (40, 20)},
|
|
473
|
+
... ) # doctest: +SKIP
|
|
474
|
+
"""
|
|
475
|
+
from polars.io.database._utils import _read_sql_adbc, _read_sql_connectorx
|
|
476
|
+
|
|
477
|
+
if not isinstance(uri, str):
|
|
478
|
+
msg = f"expected connection to be a URI string; found {qualified_type_name(uri)!r}"
|
|
479
|
+
raise TypeError(msg)
|
|
480
|
+
elif engine is None:
|
|
481
|
+
engine = "connectorx"
|
|
482
|
+
|
|
483
|
+
if engine == "connectorx":
|
|
484
|
+
if execute_options:
|
|
485
|
+
msg = "the 'connectorx' engine does not support use of `execute_options`"
|
|
486
|
+
raise ValueError(msg)
|
|
487
|
+
if pre_execution_query:
|
|
488
|
+
issue_unstable_warning(
|
|
489
|
+
"the 'pre-execution-query' parameter is considered unstable."
|
|
490
|
+
)
|
|
491
|
+
return _read_sql_connectorx(
|
|
492
|
+
query,
|
|
493
|
+
connection_uri=uri,
|
|
494
|
+
partition_on=partition_on,
|
|
495
|
+
partition_range=partition_range,
|
|
496
|
+
partition_num=partition_num,
|
|
497
|
+
protocol=protocol,
|
|
498
|
+
schema_overrides=schema_overrides,
|
|
499
|
+
pre_execution_query=pre_execution_query,
|
|
500
|
+
)
|
|
501
|
+
elif engine == "adbc":
|
|
502
|
+
if not isinstance(query, str):
|
|
503
|
+
msg = "only a single SQL query string is accepted for adbc"
|
|
504
|
+
raise ValueError(msg)
|
|
505
|
+
if pre_execution_query:
|
|
506
|
+
msg = "the 'adbc' engine does not support use of `pre_execution_query`"
|
|
507
|
+
raise ValueError(msg)
|
|
508
|
+
return _read_sql_adbc(
|
|
509
|
+
query,
|
|
510
|
+
connection_uri=uri,
|
|
511
|
+
schema_overrides=schema_overrides,
|
|
512
|
+
execute_options=execute_options,
|
|
513
|
+
)
|
|
514
|
+
else:
|
|
515
|
+
msg = f"engine must be one of {{'connectorx', 'adbc'}}, got {engine!r}"
|
|
516
|
+
raise ValueError(msg)
|