polars-runtime-compat 1.34.0b3__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b5__cp39-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (204) hide show
  1. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  2. polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
  3. polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
  4. polars/__init__.py +0 -528
  5. polars/_cpu_check.py +0 -265
  6. polars/_dependencies.py +0 -355
  7. polars/_plr.py +0 -99
  8. polars/_plr.pyi +0 -2496
  9. polars/_reexport.py +0 -23
  10. polars/_typing.py +0 -478
  11. polars/_utils/__init__.py +0 -37
  12. polars/_utils/async_.py +0 -102
  13. polars/_utils/cache.py +0 -176
  14. polars/_utils/cloud.py +0 -40
  15. polars/_utils/constants.py +0 -29
  16. polars/_utils/construction/__init__.py +0 -46
  17. polars/_utils/construction/dataframe.py +0 -1397
  18. polars/_utils/construction/other.py +0 -72
  19. polars/_utils/construction/series.py +0 -560
  20. polars/_utils/construction/utils.py +0 -118
  21. polars/_utils/convert.py +0 -224
  22. polars/_utils/deprecation.py +0 -406
  23. polars/_utils/getitem.py +0 -457
  24. polars/_utils/logging.py +0 -11
  25. polars/_utils/nest_asyncio.py +0 -264
  26. polars/_utils/parquet.py +0 -15
  27. polars/_utils/parse/__init__.py +0 -12
  28. polars/_utils/parse/expr.py +0 -242
  29. polars/_utils/polars_version.py +0 -19
  30. polars/_utils/pycapsule.py +0 -53
  31. polars/_utils/scan.py +0 -27
  32. polars/_utils/serde.py +0 -63
  33. polars/_utils/slice.py +0 -215
  34. polars/_utils/udfs.py +0 -1251
  35. polars/_utils/unstable.py +0 -63
  36. polars/_utils/various.py +0 -782
  37. polars/_utils/wrap.py +0 -25
  38. polars/api.py +0 -370
  39. polars/catalog/__init__.py +0 -0
  40. polars/catalog/unity/__init__.py +0 -19
  41. polars/catalog/unity/client.py +0 -733
  42. polars/catalog/unity/models.py +0 -152
  43. polars/config.py +0 -1571
  44. polars/convert/__init__.py +0 -25
  45. polars/convert/general.py +0 -1046
  46. polars/convert/normalize.py +0 -261
  47. polars/dataframe/__init__.py +0 -5
  48. polars/dataframe/_html.py +0 -186
  49. polars/dataframe/frame.py +0 -12582
  50. polars/dataframe/group_by.py +0 -1067
  51. polars/dataframe/plotting.py +0 -257
  52. polars/datatype_expr/__init__.py +0 -5
  53. polars/datatype_expr/array.py +0 -56
  54. polars/datatype_expr/datatype_expr.py +0 -304
  55. polars/datatype_expr/list.py +0 -18
  56. polars/datatype_expr/struct.py +0 -69
  57. polars/datatypes/__init__.py +0 -122
  58. polars/datatypes/_parse.py +0 -195
  59. polars/datatypes/_utils.py +0 -48
  60. polars/datatypes/classes.py +0 -1213
  61. polars/datatypes/constants.py +0 -11
  62. polars/datatypes/constructor.py +0 -172
  63. polars/datatypes/convert.py +0 -366
  64. polars/datatypes/group.py +0 -130
  65. polars/exceptions.py +0 -230
  66. polars/expr/__init__.py +0 -7
  67. polars/expr/array.py +0 -964
  68. polars/expr/binary.py +0 -346
  69. polars/expr/categorical.py +0 -306
  70. polars/expr/datetime.py +0 -2620
  71. polars/expr/expr.py +0 -11272
  72. polars/expr/list.py +0 -1408
  73. polars/expr/meta.py +0 -444
  74. polars/expr/name.py +0 -321
  75. polars/expr/string.py +0 -3045
  76. polars/expr/struct.py +0 -357
  77. polars/expr/whenthen.py +0 -185
  78. polars/functions/__init__.py +0 -193
  79. polars/functions/aggregation/__init__.py +0 -33
  80. polars/functions/aggregation/horizontal.py +0 -298
  81. polars/functions/aggregation/vertical.py +0 -341
  82. polars/functions/as_datatype.py +0 -848
  83. polars/functions/business.py +0 -138
  84. polars/functions/col.py +0 -384
  85. polars/functions/datatype.py +0 -121
  86. polars/functions/eager.py +0 -524
  87. polars/functions/escape_regex.py +0 -29
  88. polars/functions/lazy.py +0 -2751
  89. polars/functions/len.py +0 -68
  90. polars/functions/lit.py +0 -210
  91. polars/functions/random.py +0 -22
  92. polars/functions/range/__init__.py +0 -19
  93. polars/functions/range/_utils.py +0 -15
  94. polars/functions/range/date_range.py +0 -303
  95. polars/functions/range/datetime_range.py +0 -370
  96. polars/functions/range/int_range.py +0 -348
  97. polars/functions/range/linear_space.py +0 -311
  98. polars/functions/range/time_range.py +0 -287
  99. polars/functions/repeat.py +0 -301
  100. polars/functions/whenthen.py +0 -353
  101. polars/interchange/__init__.py +0 -10
  102. polars/interchange/buffer.py +0 -77
  103. polars/interchange/column.py +0 -190
  104. polars/interchange/dataframe.py +0 -230
  105. polars/interchange/from_dataframe.py +0 -328
  106. polars/interchange/protocol.py +0 -303
  107. polars/interchange/utils.py +0 -170
  108. polars/io/__init__.py +0 -64
  109. polars/io/_utils.py +0 -317
  110. polars/io/avro.py +0 -49
  111. polars/io/clipboard.py +0 -36
  112. polars/io/cloud/__init__.py +0 -17
  113. polars/io/cloud/_utils.py +0 -80
  114. polars/io/cloud/credential_provider/__init__.py +0 -17
  115. polars/io/cloud/credential_provider/_builder.py +0 -520
  116. polars/io/cloud/credential_provider/_providers.py +0 -618
  117. polars/io/csv/__init__.py +0 -9
  118. polars/io/csv/_utils.py +0 -38
  119. polars/io/csv/batched_reader.py +0 -142
  120. polars/io/csv/functions.py +0 -1495
  121. polars/io/database/__init__.py +0 -6
  122. polars/io/database/_arrow_registry.py +0 -70
  123. polars/io/database/_cursor_proxies.py +0 -147
  124. polars/io/database/_executor.py +0 -578
  125. polars/io/database/_inference.py +0 -314
  126. polars/io/database/_utils.py +0 -144
  127. polars/io/database/functions.py +0 -516
  128. polars/io/delta.py +0 -499
  129. polars/io/iceberg/__init__.py +0 -3
  130. polars/io/iceberg/_utils.py +0 -697
  131. polars/io/iceberg/dataset.py +0 -556
  132. polars/io/iceberg/functions.py +0 -151
  133. polars/io/ipc/__init__.py +0 -8
  134. polars/io/ipc/functions.py +0 -514
  135. polars/io/json/__init__.py +0 -3
  136. polars/io/json/read.py +0 -101
  137. polars/io/ndjson.py +0 -332
  138. polars/io/parquet/__init__.py +0 -17
  139. polars/io/parquet/field_overwrites.py +0 -140
  140. polars/io/parquet/functions.py +0 -722
  141. polars/io/partition.py +0 -491
  142. polars/io/plugins.py +0 -187
  143. polars/io/pyarrow_dataset/__init__.py +0 -5
  144. polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
  145. polars/io/pyarrow_dataset/functions.py +0 -79
  146. polars/io/scan_options/__init__.py +0 -5
  147. polars/io/scan_options/_options.py +0 -59
  148. polars/io/scan_options/cast_options.py +0 -126
  149. polars/io/spreadsheet/__init__.py +0 -6
  150. polars/io/spreadsheet/_utils.py +0 -52
  151. polars/io/spreadsheet/_write_utils.py +0 -647
  152. polars/io/spreadsheet/functions.py +0 -1323
  153. polars/lazyframe/__init__.py +0 -9
  154. polars/lazyframe/engine_config.py +0 -61
  155. polars/lazyframe/frame.py +0 -8564
  156. polars/lazyframe/group_by.py +0 -669
  157. polars/lazyframe/in_process.py +0 -42
  158. polars/lazyframe/opt_flags.py +0 -333
  159. polars/meta/__init__.py +0 -14
  160. polars/meta/build.py +0 -33
  161. polars/meta/index_type.py +0 -27
  162. polars/meta/thread_pool.py +0 -50
  163. polars/meta/versions.py +0 -120
  164. polars/ml/__init__.py +0 -0
  165. polars/ml/torch.py +0 -213
  166. polars/ml/utilities.py +0 -30
  167. polars/plugins.py +0 -155
  168. polars/py.typed +0 -0
  169. polars/pyproject.toml +0 -103
  170. polars/schema.py +0 -265
  171. polars/selectors.py +0 -3117
  172. polars/series/__init__.py +0 -5
  173. polars/series/array.py +0 -776
  174. polars/series/binary.py +0 -254
  175. polars/series/categorical.py +0 -246
  176. polars/series/datetime.py +0 -2275
  177. polars/series/list.py +0 -1087
  178. polars/series/plotting.py +0 -191
  179. polars/series/series.py +0 -9197
  180. polars/series/string.py +0 -2367
  181. polars/series/struct.py +0 -154
  182. polars/series/utils.py +0 -191
  183. polars/sql/__init__.py +0 -7
  184. polars/sql/context.py +0 -677
  185. polars/sql/functions.py +0 -139
  186. polars/string_cache.py +0 -185
  187. polars/testing/__init__.py +0 -13
  188. polars/testing/asserts/__init__.py +0 -9
  189. polars/testing/asserts/frame.py +0 -231
  190. polars/testing/asserts/series.py +0 -219
  191. polars/testing/asserts/utils.py +0 -12
  192. polars/testing/parametric/__init__.py +0 -33
  193. polars/testing/parametric/profiles.py +0 -107
  194. polars/testing/parametric/strategies/__init__.py +0 -22
  195. polars/testing/parametric/strategies/_utils.py +0 -14
  196. polars/testing/parametric/strategies/core.py +0 -615
  197. polars/testing/parametric/strategies/data.py +0 -452
  198. polars/testing/parametric/strategies/dtype.py +0 -436
  199. polars/testing/parametric/strategies/legacy.py +0 -169
  200. polars/type_aliases.py +0 -24
  201. polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
  202. polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
  203. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
  204. {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
@@ -1,516 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- from typing import TYPE_CHECKING, Any, Literal, overload
5
-
6
- from polars._dependencies import import_optional
7
- from polars._utils.unstable import issue_unstable_warning
8
- from polars._utils.various import qualified_type_name
9
- from polars.datatypes import N_INFER_DEFAULT
10
- from polars.io.database._cursor_proxies import ODBCCursorProxy
11
- from polars.io.database._executor import ConnectionExecutor
12
-
13
- if TYPE_CHECKING:
14
- from collections.abc import Iterator
15
-
16
- from sqlalchemy.sql.elements import TextClause
17
- from sqlalchemy.sql.expression import Selectable
18
-
19
- from polars import DataFrame
20
- from polars._typing import ConnectionOrCursor, DbReadEngine, SchemaDict
21
-
22
-
23
- @overload
24
- def read_database(
25
- query: str | TextClause | Selectable,
26
- connection: ConnectionOrCursor | str,
27
- *,
28
- iter_batches: Literal[False] = ...,
29
- batch_size: int | None = ...,
30
- schema_overrides: SchemaDict | None = ...,
31
- infer_schema_length: int | None = ...,
32
- execute_options: dict[str, Any] | None = ...,
33
- ) -> DataFrame: ...
34
-
35
-
36
- @overload
37
- def read_database(
38
- query: str | TextClause | Selectable,
39
- connection: ConnectionOrCursor | str,
40
- *,
41
- iter_batches: Literal[True],
42
- batch_size: int | None = ...,
43
- schema_overrides: SchemaDict | None = ...,
44
- infer_schema_length: int | None = ...,
45
- execute_options: dict[str, Any] | None = ...,
46
- ) -> Iterator[DataFrame]: ...
47
-
48
-
49
- @overload
50
- def read_database(
51
- query: str | TextClause | Selectable,
52
- connection: ConnectionOrCursor | str,
53
- *,
54
- iter_batches: bool,
55
- batch_size: int | None = ...,
56
- schema_overrides: SchemaDict | None = ...,
57
- infer_schema_length: int | None = ...,
58
- execute_options: dict[str, Any] | None = ...,
59
- ) -> DataFrame | Iterator[DataFrame]: ...
60
-
61
-
62
- def read_database(
63
- query: str | TextClause | Selectable,
64
- connection: ConnectionOrCursor | str,
65
- *,
66
- iter_batches: bool = False,
67
- batch_size: int | None = None,
68
- schema_overrides: SchemaDict | None = None,
69
- infer_schema_length: int | None = N_INFER_DEFAULT,
70
- execute_options: dict[str, Any] | None = None,
71
- ) -> DataFrame | Iterator[DataFrame]:
72
- """
73
- Read the results of a SQL query into a DataFrame, given a connection object.
74
-
75
- Parameters
76
- ----------
77
- query
78
- SQL query to execute (if using a SQLAlchemy connection object this can
79
- be a suitable "Selectable", otherwise it is expected to be a string).
80
- connection
81
- An instantiated connection (or cursor/client object) that the query can be
82
- executed against. Can also pass a valid ODBC connection string (identified as
83
- such if it contains the string "Driver={...}"), in which case the `arrow-odbc`
84
- package will be used to establish the connection and return Arrow-native data
85
- to Polars. Async driver connections are also supported, though this is currently
86
- considered unstable. If using SQLAlchemy, you can configure the connection's
87
- `execution_options` before passing to `read_database` to refine its behaviour
88
- (see the `iter_batches` parameter for an example where this can be useful).
89
-
90
- .. warning::
91
- Use of asynchronous connections is currently considered **unstable**, and
92
- unexpected issues may arise; if this happens, please report them.
93
- iter_batches
94
- Return an iterator of DataFrames, where each DataFrame represents a batch of
95
- data returned by the query; this can be useful for processing large resultsets
96
- in a more memory-efficient manner. If supported by the backend, this value is
97
- passed to the underlying query execution method (note that lower values will
98
- typically result in poor performance as they will cause many round-trips to
99
- the database). If the backend does not support changing the batch size then
100
- a single DataFrame is yielded from the iterator.
101
-
102
- .. note::
103
- If using SQLALchemy, you may also want to pass `stream_results=True` to the
104
- connection's `execution_options` method when setting this parameter, which
105
- will establish a server-side cursor; without this option some drivers (such
106
- as "psycopg2") will still materialise the entire result set client-side
107
- before batching the result locally.
108
- batch_size
109
- Indicate the size of each batch when `iter_batches` is True (note that you can
110
- still set this when `iter_batches` is False, in which case the resulting
111
- DataFrame is constructed internally using batched return before being returned
112
- to you. Note that some backends (such as Snowflake) may support batch operation
113
- but not allow for an explicit size to be set; in this case you will still
114
- receive batches but their size is determined by the backend (in which case any
115
- value set here will be ignored).
116
- schema_overrides
117
- A dictionary mapping column names to dtypes, used to override the schema
118
- inferred from the query cursor or given by the incoming Arrow data (depending
119
- on driver/backend). This can be useful if the given types can be more precisely
120
- defined (for example, if you know that a given column can be declared as `u32`
121
- instead of `i64`).
122
- infer_schema_length
123
- The maximum number of rows to scan for schema inference. If set to `None`, the
124
- full data may be scanned *(this can be slow)*. This parameter only applies if
125
- the data is read as a sequence of rows and the `schema_overrides` parameter
126
- is not set for the given column; Arrow-aware drivers also ignore this value.
127
- execute_options
128
- These options will be passed through into the underlying query execution method
129
- as kwargs. In the case of connections made using an ODBC string (which use
130
- `arrow-odbc`) these options are passed to the `read_arrow_batches_from_odbc`
131
- method.
132
-
133
- Notes
134
- -----
135
- * This function supports a wide range of native database drivers (ranging from local
136
- databases such as SQLite to large cloud databases such as Snowflake), as well as
137
- generic libraries such as ADBC, SQLAlchemy and various flavours of ODBC. If the
138
- backend supports returning Arrow data directly then this facility will be used to
139
- efficiently instantiate the DataFrame; otherwise, the DataFrame is initialised
140
- from row-wise data.
141
-
142
- * Support for Arrow Flight SQL data is available via the `adbc-driver-flightsql`
143
- package; see https://arrow.apache.org/adbc/current/driver/flight_sql.html for
144
- more details about using this driver (notable databases implementing Flight SQL
145
- include Dremio and InfluxDB).
146
-
147
- * The `read_database_uri` function can be noticeably faster than `read_database`
148
- if you are using a SQLAlchemy or DBAPI2 connection, as `connectorx` and `adbc`
149
- optimise translation of the result set into Arrow format. Note that you can
150
- determine a connection's URI from a SQLAlchemy engine object by calling
151
- `conn.engine.url.render_as_string(hide_password=False)`.
152
-
153
- * If Polars has to create a cursor from your connection in order to execute the
154
- query then that cursor will be automatically closed when the query completes;
155
- however, Polars will *never* close any other open connection or cursor.
156
-
157
- * Polars is able to support more than just relational databases and SQL queries
158
- through this function. For example, you can load local graph database results
159
- from a `KùzuDB` connection in conjunction with a Cypher query, or use SurrealQL
160
- with SurrealDB.
161
-
162
- See Also
163
- --------
164
- read_database_uri : Create a DataFrame from a SQL query using a URI string.
165
-
166
- Examples
167
- --------
168
- Instantiate a DataFrame from a SQL query against a user-supplied connection:
169
-
170
- >>> df = pl.read_database(
171
- ... query="SELECT * FROM test_data",
172
- ... connection=user_conn,
173
- ... schema_overrides={"normalised_score": pl.UInt8},
174
- ... ) # doctest: +SKIP
175
-
176
- Use a parameterised SQLAlchemy query, passing named values via `execute_options`:
177
-
178
- >>> df = pl.read_database(
179
- ... query="SELECT * FROM test_data WHERE metric > :value",
180
- ... connection=alchemy_conn,
181
- ... execute_options={"parameters": {"value": 0}},
182
- ... ) # doctest: +SKIP
183
-
184
- Use 'qmark' style parameterisation; values are still passed via `execute_options`,
185
- but in this case the "parameters" value is a sequence of literals, not a dict:
186
-
187
- >>> df = pl.read_database(
188
- ... query="SELECT * FROM test_data WHERE metric > ?",
189
- ... connection=alchemy_conn,
190
- ... execute_options={"parameters": [0]},
191
- ... ) # doctest: +SKIP
192
-
193
- Batch the results of a large SQLAlchemy query into DataFrames, each containing
194
- 100,000 rows; explicitly establish a server-side cursor using the connection's
195
- "execution_options" method to avoid loading the entire result locally before
196
- batching (this is not required for all drivers, so check your driver's
197
- documentation for more details):
198
-
199
- >>> for df in pl.read_database(
200
- ... query="SELECT * FROM test_data",
201
- ... connection=alchemy_conn.execution_options(stream_results=True),
202
- ... iter_batches=True,
203
- ... batch_size=100_000,
204
- ... ):
205
- ... do_something(df) # doctest: +SKIP
206
-
207
- Instantiate a DataFrame using an ODBC connection string (requires the `arrow-odbc`
208
- package) setting upper limits on the buffer size of variadic text/binary columns:
209
-
210
- >>> df = pl.read_database(
211
- ... query="SELECT * FROM test_data",
212
- ... connection="Driver={PostgreSQL};Server=localhost;Port=5432;Database=test;Uid=usr;Pwd=",
213
- ... execute_options={"max_text_size": 512, "max_binary_size": 1024},
214
- ... ) # doctest: +SKIP
215
-
216
- Load graph database results from a `KùzuDB` connection and a Cypher query:
217
-
218
- >>> df = pl.read_database(
219
- ... query="MATCH (a:User)-[f:Follows]->(b:User) RETURN a.name, f.since, b.name",
220
- ... connection=kuzu_db_conn,
221
- ... ) # doctest: +SKIP
222
-
223
- Load data from an asynchronous SQLAlchemy driver/engine; note that asynchronous
224
- connections and sessions are also supported here:
225
-
226
- >>> from sqlalchemy.ext.asyncio import create_async_engine
227
- >>> async_engine = create_async_engine("sqlite+aiosqlite:///test.db")
228
- >>> df = pl.read_database(
229
- ... query="SELECT * FROM test_data",
230
- ... connection=async_engine,
231
- ... ) # doctest: +SKIP
232
-
233
- Load data from an `AsyncSurrealDB` client connection object; note that both the "ws"
234
- and "http" protocols are supported, as is the synchronous `SurrealDB` client. The
235
- async loop can be run with standard `asyncio` or with `uvloop`:
236
-
237
- >>> import asyncio # (or uvloop)
238
- >>> async def surreal_query_to_frame(query: str, url: str):
239
- ... async with AsyncSurrealDB(url) as client:
240
- ... await client.use(namespace="test", database="test")
241
- ... return pl.read_database(query=query, connection=client)
242
- >>> df = asyncio.run(
243
- ... surreal_query_to_frame(
244
- ... query="SELECT * FROM test",
245
- ... url="http://localhost:8000",
246
- ... )
247
- ... ) # doctest: +SKIP
248
-
249
- """ # noqa: W505
250
- if isinstance(connection, str):
251
- # check for odbc connection string
252
- if re.search(r"\bdriver\s*=\s*{[^}]+?}", connection, re.IGNORECASE):
253
- _ = import_optional(
254
- module_name="arrow_odbc",
255
- err_prefix="use of ODBC connection string requires the",
256
- err_suffix="package",
257
- )
258
- connection = ODBCCursorProxy(connection)
259
- elif "://" in connection:
260
- # otherwise looks like a mistaken call to read_database_uri
261
- msg = "string URI is invalid here; call `read_database_uri` instead"
262
- raise ValueError(msg)
263
- else:
264
- msg = "unable to identify string connection as valid ODBC (no driver)"
265
- raise ValueError(msg)
266
-
267
- # return frame from arbitrary connections using the executor abstraction
268
- with ConnectionExecutor(connection) as cx:
269
- return cx.execute(
270
- query=query,
271
- options=execute_options,
272
- ).to_polars(
273
- batch_size=batch_size,
274
- iter_batches=iter_batches,
275
- schema_overrides=schema_overrides,
276
- infer_schema_length=infer_schema_length,
277
- )
278
-
279
-
280
- @overload
281
- def read_database_uri(
282
- query: str,
283
- uri: str,
284
- *,
285
- partition_on: str | None = None,
286
- partition_range: tuple[int, int] | None = None,
287
- partition_num: int | None = None,
288
- protocol: str | None = None,
289
- engine: Literal["adbc"],
290
- schema_overrides: SchemaDict | None = None,
291
- execute_options: dict[str, Any] | None = None,
292
- pre_execution_query: str | list[str] | None = None,
293
- ) -> DataFrame: ...
294
-
295
-
296
- @overload
297
- def read_database_uri(
298
- query: list[str] | str,
299
- uri: str,
300
- *,
301
- partition_on: str | None = None,
302
- partition_range: tuple[int, int] | None = None,
303
- partition_num: int | None = None,
304
- protocol: str | None = None,
305
- engine: Literal["connectorx"] | None = None,
306
- schema_overrides: SchemaDict | None = None,
307
- execute_options: None = None,
308
- pre_execution_query: str | list[str] | None = None,
309
- ) -> DataFrame: ...
310
-
311
-
312
- @overload
313
- def read_database_uri(
314
- query: str,
315
- uri: str,
316
- *,
317
- partition_on: str | None = None,
318
- partition_range: tuple[int, int] | None = None,
319
- partition_num: int | None = None,
320
- protocol: str | None = None,
321
- engine: DbReadEngine | None = None,
322
- schema_overrides: None = None,
323
- execute_options: dict[str, Any] | None = None,
324
- pre_execution_query: str | list[str] | None = None,
325
- ) -> DataFrame: ...
326
-
327
-
328
- def read_database_uri(
329
- query: list[str] | str,
330
- uri: str,
331
- *,
332
- partition_on: str | None = None,
333
- partition_range: tuple[int, int] | None = None,
334
- partition_num: int | None = None,
335
- protocol: str | None = None,
336
- engine: DbReadEngine | None = None,
337
- schema_overrides: SchemaDict | None = None,
338
- execute_options: dict[str, Any] | None = None,
339
- pre_execution_query: str | list[str] | None = None,
340
- ) -> DataFrame:
341
- """
342
- Read the results of a SQL query into a DataFrame, given a URI.
343
-
344
- Parameters
345
- ----------
346
- query
347
- Raw SQL query (or queries).
348
- uri
349
- A connectorx or ADBC connection URI string that starts with the backend's
350
- driver name, for example:
351
-
352
- * "postgresql://user:pass@server:port/database"
353
- * "snowflake://user:pass@account/database/schema?warehouse=warehouse&role=role"
354
-
355
- The caller is responsible for escaping any special characters in the string,
356
- which will be passed "as-is" to the underlying engine (this is most often
357
- required when coming across special characters in the password).
358
- partition_on
359
- The column on which to partition the result (connectorx).
360
- partition_range
361
- The value range of the partition column (connectorx).
362
- partition_num
363
- How many partitions to generate (connectorx).
364
- protocol
365
- Backend-specific transfer protocol directive (connectorx); see connectorx
366
- documentation for more details.
367
- engine : {'connectorx', 'adbc'}
368
- Selects the engine used for reading the database (defaulting to connectorx):
369
-
370
- * `'connectorx'`
371
- Supports a range of databases, such as PostgreSQL, Redshift, MySQL, MariaDB,
372
- Clickhouse, Oracle, BigQuery, SQL Server, and so on. For an up-to-date list
373
- please see the connectorx docs:
374
- https://github.com/sfu-db/connector-x#supported-sources--destinations
375
- * `'adbc'`
376
- Currently there is limited support for this engine, with a relatively small
377
- number of drivers available, most of which are still in development. For
378
- an up-to-date list of drivers please see the ADBC docs:
379
- https://arrow.apache.org/adbc/
380
- schema_overrides
381
- A dictionary mapping column names to dtypes, used to override the schema
382
- given in the data returned by the query.
383
- execute_options
384
- These options will be passed to the underlying query execution method as
385
- kwargs. Note that connectorx does not support this parameter and ADBC currently
386
- only supports positional 'qmark' style parameterization.
387
- pre_execution_query
388
- SQL query or list of SQL queries executed before main query (connectorx>=0.4.2).
389
- Can be used to set runtime configurations using SET statements.
390
- Only applicable for Postgres and MySQL source.
391
- Only applicable with the connectorx engine.
392
-
393
- .. warning::
394
- This functionality is considered **unstable**. It may be changed
395
- at any point without it being considered a breaking change.
396
-
397
- Notes
398
- -----
399
- For `connectorx`, ensure that you have `connectorx>=0.3.2`. The documentation
400
- is available `here <https://sfu-db.github.io/connector-x/intro.html>`_.
401
-
402
- For `adbc` you will need to have installed `pyarrow` and the ADBC driver associated
403
- with the backend you are connecting to, eg: `adbc-driver-postgresql`.
404
-
405
- If your password contains special characters, you will need to escape them.
406
- This will usually require the use of a URL-escaping function, for example:
407
-
408
- >>> from urllib.parse import quote, quote_plus
409
- >>> quote_plus("pass word?")
410
- 'pass+word%3F'
411
- >>> quote("pass word?")
412
- 'pass%20word%3F'
413
-
414
- See Also
415
- --------
416
- read_database : Create a DataFrame from a SQL query using a connection object.
417
-
418
- Examples
419
- --------
420
- Create a DataFrame from a SQL query using a single thread:
421
-
422
- >>> uri = "postgresql://username:password@server:port/database"
423
- >>> query = "SELECT * FROM lineitem"
424
- >>> pl.read_database_uri(query, uri) # doctest: +SKIP
425
-
426
- Create a DataFrame in parallel using 10 threads by automatically partitioning
427
- the provided SQL on the partition column:
428
-
429
- >>> uri = "postgresql://username:password@server:port/database"
430
- >>> query = "SELECT * FROM lineitem"
431
- >>> pl.read_database_uri(
432
- ... query,
433
- ... uri,
434
- ... partition_on="partition_col",
435
- ... partition_num=10,
436
- ... engine="connectorx",
437
- ... ) # doctest: +SKIP
438
-
439
- Create a DataFrame in parallel using 2 threads by explicitly providing two
440
- SQL queries:
441
-
442
- >>> uri = "postgresql://username:password@server:port/database"
443
- >>> queries = [
444
- ... "SELECT * FROM lineitem WHERE partition_col <= 10",
445
- ... "SELECT * FROM lineitem WHERE partition_col > 10",
446
- ... ]
447
- >>> pl.read_database_uri(queries, uri, engine="connectorx") # doctest: +SKIP
448
-
449
- Read data from Snowflake using the ADBC driver:
450
-
451
- >>> df = pl.read_database_uri(
452
- ... "SELECT * FROM test_table",
453
- ... "snowflake://user:pass@company-org/testdb/public?warehouse=test&role=myrole",
454
- ... engine="adbc",
455
- ... ) # doctest: +SKIP
456
-
457
- Pass a single parameter via `execute_options` into a query using the ADBC driver:
458
-
459
- >>> df = pl.read_database_uri(
460
- ... "SELECT * FROM employees WHERE hourly_rate > ?",
461
- ... "sqlite:///:memory:",
462
- ... engine="adbc",
463
- ... execute_options={"parameters": (30,)},
464
- ... ) # doctest: +SKIP
465
-
466
- Or pass multiple parameters:
467
-
468
- >>> df = pl.read_database_uri(
469
- ... "SELECT * FROM employees WHERE hourly_rate BETWEEN ? AND ?",
470
- ... "sqlite:///:memory:",
471
- ... engine="adbc",
472
- ... execute_options={"parameters": (40, 20)},
473
- ... ) # doctest: +SKIP
474
- """
475
- from polars.io.database._utils import _read_sql_adbc, _read_sql_connectorx
476
-
477
- if not isinstance(uri, str):
478
- msg = f"expected connection to be a URI string; found {qualified_type_name(uri)!r}"
479
- raise TypeError(msg)
480
- elif engine is None:
481
- engine = "connectorx"
482
-
483
- if engine == "connectorx":
484
- if execute_options:
485
- msg = "the 'connectorx' engine does not support use of `execute_options`"
486
- raise ValueError(msg)
487
- if pre_execution_query:
488
- issue_unstable_warning(
489
- "the 'pre-execution-query' parameter is considered unstable."
490
- )
491
- return _read_sql_connectorx(
492
- query,
493
- connection_uri=uri,
494
- partition_on=partition_on,
495
- partition_range=partition_range,
496
- partition_num=partition_num,
497
- protocol=protocol,
498
- schema_overrides=schema_overrides,
499
- pre_execution_query=pre_execution_query,
500
- )
501
- elif engine == "adbc":
502
- if not isinstance(query, str):
503
- msg = "only a single SQL query string is accepted for adbc"
504
- raise ValueError(msg)
505
- if pre_execution_query:
506
- msg = "the 'adbc' engine does not support use of `pre_execution_query`"
507
- raise ValueError(msg)
508
- return _read_sql_adbc(
509
- query,
510
- connection_uri=uri,
511
- schema_overrides=schema_overrides,
512
- execute_options=execute_options,
513
- )
514
- else:
515
- msg = f"engine must be one of {{'connectorx', 'adbc'}}, got {engine!r}"
516
- raise ValueError(msg)