confluent-sql 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confluent_sql/__init__.py +64 -0
- confluent_sql/__version__.py +10 -0
- confluent_sql/changelog_compressor.py +603 -0
- confluent_sql/connection.py +1007 -0
- confluent_sql/cursor.py +804 -0
- confluent_sql/exceptions.py +209 -0
- confluent_sql/execution_mode.py +34 -0
- confluent_sql/result_readers.py +663 -0
- confluent_sql/statement.py +566 -0
- confluent_sql/types.py +1606 -0
- confluent_sql-0.1.0.dist-info/METADATA +214 -0
- confluent_sql-0.1.0.dist-info/RECORD +14 -0
- confluent_sql-0.1.0.dist-info/WHEEL +4 -0
- confluent_sql-0.1.0.dist-info/licenses/LICENSE.txt +203 -0
|
@@ -0,0 +1,1007 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Connection module for Confluent SQL DB-API driver.
|
|
3
|
+
|
|
4
|
+
This module provides the connect function and Connection class for establishing
|
|
5
|
+
connections to Confluent SQL services.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import uuid
|
|
12
|
+
import warnings
|
|
13
|
+
from collections import namedtuple
|
|
14
|
+
from collections.abc import Generator
|
|
15
|
+
from contextlib import contextmanager
|
|
16
|
+
from dataclasses import fields, is_dataclass
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
from .__version__ import VERSION
|
|
22
|
+
from .cursor import Cursor
|
|
23
|
+
from .exceptions import InterfaceError, OperationalError, StatementDeletedError
|
|
24
|
+
from .execution_mode import ExecutionMode
|
|
25
|
+
from .statement import LABEL_PREFIX as STATEMENT_LABEL_PREFIX
|
|
26
|
+
from .statement import ChangelogRow, Statement
|
|
27
|
+
from .types import RowPythonTypes
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def connect( # noqa: PLR0913
|
|
33
|
+
flink_api_key: str,
|
|
34
|
+
flink_api_secret: str,
|
|
35
|
+
environment: str,
|
|
36
|
+
compute_pool_id: str,
|
|
37
|
+
organization_id: str,
|
|
38
|
+
cloud_provider: str,
|
|
39
|
+
cloud_region: str,
|
|
40
|
+
api_key: str | None = None,
|
|
41
|
+
api_secret: str | None = None,
|
|
42
|
+
dbname: str | None = None,
|
|
43
|
+
result_page_fetch_pause_millis: int = 100,
|
|
44
|
+
http_user_agent: str | None = None,
|
|
45
|
+
) -> Connection:
|
|
46
|
+
"""
|
|
47
|
+
Create a connection to a Confluent SQL service.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
flink_api_key: Flink API key
|
|
51
|
+
flink_api_secret: Flink API secret
|
|
52
|
+
environment: Environment ID
|
|
53
|
+
compute_pool_id: Compute pool ID for SQL execution
|
|
54
|
+
organization_id: Organization ID
|
|
55
|
+
cloud_provider: Cloud provider (e.g., "aws", "gcp", "azure")
|
|
56
|
+
cloud_region: Cloud region (e.g., "us-east-2", "us-west-2")
|
|
57
|
+
api_key: Confluent Cloud API key (optional, for general Confluent Cloud resources)
|
|
58
|
+
api_secret: Confluent Cloud API secret (optional)
|
|
59
|
+
dbname: The name of the database to use (optional)
|
|
60
|
+
result_page_fetch_pause_millis: Maximum milliseconds to wait between fetching pages of
|
|
61
|
+
statement results (per statement). Defaults to 100ms. Prevents tight loops of requests
|
|
62
|
+
to the statement results API when consuming results for a statement, especially when
|
|
63
|
+
no results are currently available but more may be forthcoming, such as when
|
|
64
|
+
consuming results from a running streaming query, or prior to when the first page
|
|
65
|
+
of results is ready for a snapshot query.
|
|
66
|
+
|
|
67
|
+
If it has already been at least this long since the most recent fetch of results for the
|
|
68
|
+
statement, then no delay will happen.
|
|
69
|
+
http_user_agent: User-Agent header value for HTTP requests. Must be a string
|
|
70
|
+
between 1-100 characters. Defaults to
|
|
71
|
+
"Confluent-SQL-Dbapi/v<version> (https://confluent.io; support@confluent.io)"
|
|
72
|
+
where version is from __version__.py
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A Connection object representing the database connection
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
InterfaceError: If connection parameters are invalid
|
|
79
|
+
OperationalError: If connection cannot be established
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
if not environment:
|
|
83
|
+
raise InterfaceError("Environment ID is required")
|
|
84
|
+
|
|
85
|
+
if not compute_pool_id:
|
|
86
|
+
raise InterfaceError("Compute pool ID is required")
|
|
87
|
+
|
|
88
|
+
if not organization_id:
|
|
89
|
+
raise InterfaceError("Organization ID is required")
|
|
90
|
+
|
|
91
|
+
if not cloud_provider:
|
|
92
|
+
raise InterfaceError("Cloud provider is required")
|
|
93
|
+
|
|
94
|
+
if not cloud_region:
|
|
95
|
+
raise InterfaceError("Cloud region is required")
|
|
96
|
+
|
|
97
|
+
if not flink_api_key or not flink_api_secret:
|
|
98
|
+
raise InterfaceError("Flink API key and secret are required")
|
|
99
|
+
|
|
100
|
+
return Connection(
|
|
101
|
+
flink_api_key,
|
|
102
|
+
flink_api_secret,
|
|
103
|
+
environment,
|
|
104
|
+
compute_pool_id,
|
|
105
|
+
organization_id,
|
|
106
|
+
cloud_provider,
|
|
107
|
+
cloud_region,
|
|
108
|
+
api_key=api_key,
|
|
109
|
+
api_secret=api_secret,
|
|
110
|
+
dbname=dbname,
|
|
111
|
+
statement_results_page_fetch_pause_millis=result_page_fetch_pause_millis,
|
|
112
|
+
http_user_agent=http_user_agent,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class Connection:
|
|
117
|
+
"""
|
|
118
|
+
A connection to a Confluent SQL service.
|
|
119
|
+
|
|
120
|
+
This class represents a connection to a Confluent SQL service and provides
|
|
121
|
+
methods for creating cursors and managing the connection lifecycle.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
DEFAULT_USER_AGENT = (
|
|
125
|
+
f"Confluent-SQL-Dbapi/v{VERSION} (https://confluent.io; support@confluent.io)"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
environment: str
|
|
129
|
+
organization_id: str
|
|
130
|
+
compute_pool_id: str
|
|
131
|
+
api_key: str | None
|
|
132
|
+
api_secret: str | None
|
|
133
|
+
host: str | None
|
|
134
|
+
statement_results_page_fetch_pause_secs: float
|
|
135
|
+
"""Maximum seconds to wait between fetching pages of statement
|
|
136
|
+
results (per statement). Prevents tight loops of requests to the
|
|
137
|
+
statement results API when consuming results for a statement, especially when no results are
|
|
138
|
+
currently available but more may be forthcoming, such as when consuming results from
|
|
139
|
+
a running streaming query, or prior to when the first page of results is ready for
|
|
140
|
+
a snapshot query.
|
|
141
|
+
|
|
142
|
+
If it has already been at least this long since the most recent fetch of results for the
|
|
143
|
+
statement, then no delay will happen.
|
|
144
|
+
|
|
145
|
+
Referenced by the result reader when fetching pages of results for individual
|
|
146
|
+
statements.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
_closed: bool
|
|
150
|
+
_dbname: str | None
|
|
151
|
+
_client: httpx.Client
|
|
152
|
+
_http_user_agent: str
|
|
153
|
+
|
|
154
|
+
_row_type_registry: RowTypeRegistry
|
|
155
|
+
"""Registry for user-defined row types, see register_row_type()."""
|
|
156
|
+
|
|
157
|
+
_snapshot_warning_issued: bool
|
|
158
|
+
"""Internal flag to track whether the snapshot query early access warning has been issued.
|
|
159
|
+
Remove after snapshot queries reach open preview (expected May 2026)."""
|
|
160
|
+
|
|
161
|
+
def __init__( # noqa: PLR0913
|
|
162
|
+
self,
|
|
163
|
+
flink_api_key: str,
|
|
164
|
+
flink_api_secret: str,
|
|
165
|
+
environment: str,
|
|
166
|
+
compute_pool_id: str,
|
|
167
|
+
organization_id: str,
|
|
168
|
+
cloud_provider: str,
|
|
169
|
+
cloud_region: str,
|
|
170
|
+
api_key: str | None = None,
|
|
171
|
+
api_secret: str | None = None,
|
|
172
|
+
host: str | None = None,
|
|
173
|
+
dbname: str | None = None,
|
|
174
|
+
statement_results_page_fetch_pause_millis: int = 100,
|
|
175
|
+
http_user_agent: str | None = None,
|
|
176
|
+
):
|
|
177
|
+
"""
|
|
178
|
+
Initialize a new connection to a Confluent SQL service.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
flink_api_key: Flink API key
|
|
182
|
+
flink_api_secret: Flink API secret
|
|
183
|
+
environment: Environment ID
|
|
184
|
+
compute_pool_id: Compute pool ID for SQL execution
|
|
185
|
+
organization_id: Organization ID
|
|
186
|
+
cloud_provider: Cloud provider
|
|
187
|
+
cloud_region: Cloud region (e.g., "us-east-2", "us-west-2")
|
|
188
|
+
result_page_fetch_pause_millis: Milliseconds to possibly wait between fetching pages of
|
|
189
|
+
statement results. Defaults to 100ms. If most recent fetch of results for a
|
|
190
|
+
statement was more than this long ago, then no delay will happen when fetching
|
|
191
|
+
the next page of results for the statement.
|
|
192
|
+
api_key: Confluent Cloud API key for general Confluent Cloud resources (optional)
|
|
193
|
+
api_secret: Confluent Cloud API secret for general Confluent Cloud resources (optional)
|
|
194
|
+
host: The base URL for Confluent Cloud API (optional)
|
|
195
|
+
dbname: The name of the database to use (optional)
|
|
196
|
+
http_user_agent: User-Agent header for HTTP requests. String, 1-100 chars.
|
|
197
|
+
Defaults to the value of DEFAULT_USER_AGENT, which includes the
|
|
198
|
+
driver name/version, documentation URL, and support email.
|
|
199
|
+
"""
|
|
200
|
+
self.environment = environment
|
|
201
|
+
self.compute_pool_id = compute_pool_id
|
|
202
|
+
self.organization_id = organization_id
|
|
203
|
+
self.api_key = api_key
|
|
204
|
+
self.api_secret = api_secret
|
|
205
|
+
self.host = host
|
|
206
|
+
|
|
207
|
+
if statement_results_page_fetch_pause_millis < 0:
|
|
208
|
+
raise InterfaceError("result_page_fetch_pause_millis must be non-negative")
|
|
209
|
+
|
|
210
|
+
# Will be referenced by cursor / result reader when
|
|
211
|
+
# fetching pages of results for individual statements.
|
|
212
|
+
self.statement_results_page_fetch_pause_secs = (
|
|
213
|
+
statement_results_page_fetch_pause_millis / 1000.0
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Internal state
|
|
217
|
+
self._closed = False
|
|
218
|
+
self._dbname = dbname
|
|
219
|
+
|
|
220
|
+
# Set user agent (validation happens in setter, default if None)
|
|
221
|
+
self.http_user_agent = (
|
|
222
|
+
http_user_agent if http_user_agent is not None else self.DEFAULT_USER_AGENT
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Create httpx client for making API calls
|
|
226
|
+
if self.host is None:
|
|
227
|
+
self.host = f"https://flink.{cloud_region}.{cloud_provider}.confluent.cloud"
|
|
228
|
+
base_url = f"{self.host}/sql/v1/organizations/{organization_id}/environments/{environment}"
|
|
229
|
+
|
|
230
|
+
# Create httpx client for making API calls
|
|
231
|
+
basic_auth = httpx.BasicAuth(username=flink_api_key, password=flink_api_secret)
|
|
232
|
+
self._client = httpx.Client(
|
|
233
|
+
auth=basic_auth,
|
|
234
|
+
base_url=base_url,
|
|
235
|
+
headers={
|
|
236
|
+
"Content-Type": "application/json",
|
|
237
|
+
"User-Agent": self._http_user_agent,
|
|
238
|
+
},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
self._row_type_registry = RowTypeRegistry()
|
|
242
|
+
|
|
243
|
+
# TODO: remove after snapshot queries reach open preview (May 2026)
|
|
244
|
+
self._snapshot_warning_issued = False
|
|
245
|
+
|
|
246
|
+
def close(self) -> None:
|
|
247
|
+
"""
|
|
248
|
+
Close the connection.
|
|
249
|
+
"""
|
|
250
|
+
if not self._closed:
|
|
251
|
+
self._closed = True
|
|
252
|
+
self._client.close()
|
|
253
|
+
else:
|
|
254
|
+
logger.info("Trying to close a closed connection, ignoring")
|
|
255
|
+
|
|
256
|
+
def cursor(self, *, as_dict: bool = False, mode=ExecutionMode.SNAPSHOT) -> Cursor:
|
|
257
|
+
"""
|
|
258
|
+
Create a new cursor for executing statements. Defaults to creating
|
|
259
|
+
a snapshot (bounded) query cursor for returning point-in-time results.
|
|
260
|
+
|
|
261
|
+
Snapshot queries will return results from a consistent point in time, and
|
|
262
|
+
the result stream is considered both bounded and append-only, and will
|
|
263
|
+
only be generated when the query execution has completed, having consumed
|
|
264
|
+
all source data as of the query start time.
|
|
265
|
+
|
|
266
|
+
Streaming queries will return results as they are produced by the executing query,
|
|
267
|
+
but may or may not be append-only depending on the query characteristics. For example,
|
|
268
|
+
a streaming query that only filters from source tables (Kafka topics) will be append-only,
|
|
269
|
+
but a streaming query that performs aggregations or joins will not be, as updates to
|
|
270
|
+
previously emitted results may occur as more data is processed. Non-append-only streaming
|
|
271
|
+
query results will include a changelog operation with each row indicating whether the row
|
|
272
|
+
is an insertion, update, or deletion, indicated by the 'op' field in the returned
|
|
273
|
+
ChangeloggedRow namedtuple.
|
|
274
|
+
|
|
275
|
+
So, while mode=ExecutionMode.STREAMING_QUERY will always initiate a streaming query,
|
|
276
|
+
the presence of changelog operations in the results depends on whether the
|
|
277
|
+
query submitted will result in append-only processing or not.
|
|
278
|
+
|
|
279
|
+
See the documentation in the Cursor class for more details on the behavior
|
|
280
|
+
of the cursor, its fetch method and iteration behavior, as to the differences
|
|
281
|
+
between snapshot and streaming queries.
|
|
282
|
+
|
|
283
|
+
The cursor's fetch methods return different types based on configuration
|
|
284
|
+
and query characteristics:
|
|
285
|
+
|
|
286
|
+
Return Type Matrix
|
|
287
|
+
------------------
|
|
288
|
+
1. **Append-only queries + as_dict=False** (default):
|
|
289
|
+
Returns tuples: `("val1", "val2", ...)`
|
|
290
|
+
Standard DB-API format for regular SELECT queries
|
|
291
|
+
|
|
292
|
+
2. **Append-only queries + as_dict=True**:
|
|
293
|
+
Returns dicts: `{"col1": "val1", "col2": "val2"}`
|
|
294
|
+
Column names as keys for better readability
|
|
295
|
+
|
|
296
|
+
3. **Changelog queries + as_dict=False** (streaming non-append-only, row as tuples):
|
|
297
|
+
Returns ChangeloggedRow namedtuples: `ChangeloggedRow(op=Op.INSERT,row=("v1", "v2"))`
|
|
298
|
+
Includes operation type (INSERT/UPDATE_BEFORE/UPDATE_AFTER/DELETE) with row data
|
|
299
|
+
|
|
300
|
+
4. **Changelog queries + as_dict=True** (streaming non-append-only, row as dicts):
|
|
301
|
+
Returns ChangeloggedRow with dict: `ChangeloggedRow(op=Op.INSERT, row={"col1": "val1"})`
|
|
302
|
+
Combines operation tracking with named column access
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
as_dict: If True, return row data as dictionaries with column names as keys.
|
|
306
|
+
If False (default), return row data as tuples.
|
|
307
|
+
mode: The execution mode for the cursor. Defaults to SNAPSHOT for bounded
|
|
308
|
+
queries. Use STREAMING_QUERY for continuous/unbounded queries.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
A new Cursor object associated with this connection
|
|
312
|
+
|
|
313
|
+
Raises:
|
|
314
|
+
InterfaceError: If the connection is closed
|
|
315
|
+
|
|
316
|
+
Examples:
|
|
317
|
+
# Standard snapshot query with tuples
|
|
318
|
+
cursor = conn.cursor()
|
|
319
|
+
cursor.execute("SELECT * FROM users")
|
|
320
|
+
assert not cursor.is_streaming
|
|
321
|
+
row = cursor.fetchone() # Returns: ("Alice", 25), or None if no more rows, period.
|
|
322
|
+
|
|
323
|
+
# Snapshot query with dicts
|
|
324
|
+
cursor = conn.cursor(as_dict=True)
|
|
325
|
+
cursor.execute("SELECT * FROM users")
|
|
326
|
+
assert cursor.as_dict == True
|
|
327
|
+
assert not cursor.is_streaming
|
|
328
|
+
row = cursor.fetchone() # Returns: {"name": "Alice", "age": 25} or None if no more rows
|
|
329
|
+
|
|
330
|
+
# Streaming append-only query with tuples
|
|
331
|
+
cursor = conn.cursor(mode=ExecutionMode.STREAMING_QUERY)
|
|
332
|
+
assert cursor.is_streaming
|
|
333
|
+
cursor.execute("SELECT user_id FROM orders")
|
|
334
|
+
assert not cursor.returns_changelog # Will not be known until after execute().
|
|
335
|
+
while cursor.may_have_results:
|
|
336
|
+
# Returns either ("Alice",) or None if _no data available at this time_.
|
|
337
|
+
row = cursor.fetchone()
|
|
338
|
+
if row is not None:
|
|
339
|
+
...
|
|
340
|
+
|
|
341
|
+
# Streaming changelog query
|
|
342
|
+
cursor = conn.cursor(mode=ExecutionMode.STREAMING_QUERY)
|
|
343
|
+
cursor.execute("SELECT user_id, count(*) from orders group by user_id")
|
|
344
|
+
assert cursor.is_streaming
|
|
345
|
+
assert cursor.returns_changelog
|
|
346
|
+
while cursor.may_have_results:
|
|
347
|
+
row = cursor.fetchone()
|
|
348
|
+
# may return None if _no data available at this time_
|
|
349
|
+
if row is not None:
|
|
350
|
+
# Returns a ChangeloggedRow namedtuple:
|
|
351
|
+
# ChangeloggedRow(op=Op.INSERT, row=("Alice", 25))
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
"""
|
|
355
|
+
if self._closed:
|
|
356
|
+
raise InterfaceError("Connection is closed")
|
|
357
|
+
|
|
358
|
+
# TODO: remove after snapshot queries reach open preview (May 2026)
|
|
359
|
+
if mode.is_snapshot and not self._snapshot_warning_issued:
|
|
360
|
+
self._snapshot_warning_issued = True
|
|
361
|
+
warnings.warn(
|
|
362
|
+
"Snapshot queries on Confluent Cloud Flink SQL are currently in "
|
|
363
|
+
"Early Access and may be subject to change.",
|
|
364
|
+
stacklevel=2,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
return Cursor(self, as_dict=as_dict, execution_mode=mode)
|
|
368
|
+
|
|
369
|
+
def streaming_cursor(self, *, as_dict: bool = False) -> Cursor:
|
|
370
|
+
"""
|
|
371
|
+
Create a streaming query cursor. Waits for RUNNING, iterates over continuous results.
|
|
372
|
+
|
|
373
|
+
This is a convenience method equivalent to:
|
|
374
|
+
`cursor(as_dict=as_dict, mode=ExecutionMode.STREAMING_QUERY)`
|
|
375
|
+
|
|
376
|
+
For streaming queries, the return type depends on whether the query is append-only:
|
|
377
|
+
- Append-only: Returns tuples or dicts based on as_dict parameter
|
|
378
|
+
- Non-append-only: Returns ChangeloggedRow namedtuples containing operation and row data
|
|
379
|
+
|
|
380
|
+
See cursor() method documentation for detailed return type information.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
as_dict: If True, return row data as dictionaries. If False, as tuples.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
A new Cursor configured for streaming query execution
|
|
387
|
+
"""
|
|
388
|
+
return Cursor(self, as_dict=as_dict, execution_mode=ExecutionMode.STREAMING_QUERY)
|
|
389
|
+
|
|
390
|
+
@contextmanager
|
|
391
|
+
def closing_cursor(
|
|
392
|
+
self, *, as_dict: bool = False, mode: ExecutionMode = ExecutionMode.SNAPSHOT
|
|
393
|
+
) -> Generator[Cursor, None, None]:
|
|
394
|
+
"""
|
|
395
|
+
Context manager for creating and automatically closing a cursor.
|
|
396
|
+
|
|
397
|
+
Creates a cursor with the same return type variations as cursor() method.
|
|
398
|
+
See cursor() documentation for details on the four possible return types
|
|
399
|
+
based on as_dict and query characteristics (append-only vs changelog).
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
as_dict: If True, fetch results as dictionaries, otherwise as tuples
|
|
403
|
+
mode: The execution mode for the cursor. Defaults to SNAPSHOT.
|
|
404
|
+
|
|
405
|
+
Yields:
|
|
406
|
+
A new Cursor object associated with this connection
|
|
407
|
+
|
|
408
|
+
Raises:
|
|
409
|
+
InterfaceError: If the connection is closed
|
|
410
|
+
|
|
411
|
+
Example:
|
|
412
|
+
with conn.closing_cursor(as_dict=True) as cursor:
|
|
413
|
+
cursor.execute("SELECT * FROM users")
|
|
414
|
+
for row in cursor:
|
|
415
|
+
print(row) # Prints dicts with column names
|
|
416
|
+
# cursor is automatically closed after the with block
|
|
417
|
+
"""
|
|
418
|
+
cursor = self.cursor(as_dict=as_dict, mode=mode)
|
|
419
|
+
try:
|
|
420
|
+
yield cursor
|
|
421
|
+
finally:
|
|
422
|
+
cursor.close()
|
|
423
|
+
|
|
424
|
+
@contextmanager
|
|
425
|
+
def closing_streaming_cursor(self, *, as_dict: bool = False) -> Generator[Cursor, None, None]:
|
|
426
|
+
"""
|
|
427
|
+
Context manager for creating and automatically closing a streaming cursor.
|
|
428
|
+
|
|
429
|
+
Convenience method equivalent to:
|
|
430
|
+
closing_cursor(as_dict=as_dict, mode=ExecutionMode.STREAMING_QUERY)
|
|
431
|
+
|
|
432
|
+
Creates a streaming cursor that processes continuous data from Flink SQL
|
|
433
|
+
with automatic cleanup. Streaming cursors return data as it arrives without
|
|
434
|
+
blocking or collecting all results into memory.
|
|
435
|
+
|
|
436
|
+
Statement Lifecycle Management:
|
|
437
|
+
The context manager automatically closes the cursor via cursor.close(),
|
|
438
|
+
which makes a best-effort attempt to delete statements that are already
|
|
439
|
+
in terminal phases (COMPLETED/FAILED/STOPPED). Deletion errors are
|
|
440
|
+
logged and suppressed, so server-side cleanup is not strictly
|
|
441
|
+
guaranteed. Long-running streaming queries that remain RUNNING on the
|
|
442
|
+
server after exiting the context manager are NOT automatically stopped
|
|
443
|
+
or deleted server-side.
|
|
444
|
+
|
|
445
|
+
To explicitly stop a RUNNING streaming statement, call
|
|
446
|
+
cursor.delete_statement() or connection.delete_statement(statement_id)
|
|
447
|
+
before exiting the context manager.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
as_dict: If True, fetch results as dictionaries, otherwise as tuples
|
|
451
|
+
|
|
452
|
+
Yields:
|
|
453
|
+
A new streaming Cursor object associated with this connection
|
|
454
|
+
|
|
455
|
+
Raises:
|
|
456
|
+
InterfaceError: If the connection is closed
|
|
457
|
+
|
|
458
|
+
Example:
|
|
459
|
+
with conn.closing_streaming_cursor(as_dict=True) as cursor:
|
|
460
|
+
cursor.execute("SELECT * FROM orders WHERE amount > %s", (1000,))
|
|
461
|
+
while cursor.may_have_results:
|
|
462
|
+
rows = cursor.fetchmany(10)
|
|
463
|
+
if rows:
|
|
464
|
+
for row in rows:
|
|
465
|
+
process(row)
|
|
466
|
+
else:
|
|
467
|
+
time.sleep(0.1)
|
|
468
|
+
# cursor is automatically closed after the with block
|
|
469
|
+
"""
|
|
470
|
+
with self.closing_cursor(as_dict=as_dict, mode=ExecutionMode.STREAMING_QUERY) as cursor:
|
|
471
|
+
yield cursor
|
|
472
|
+
|
|
473
|
+
def execute_snapshot_ddl(
|
|
474
|
+
self,
|
|
475
|
+
statement_text: str,
|
|
476
|
+
parameters: tuple | list | None = None,
|
|
477
|
+
timeout: int = 3000,
|
|
478
|
+
statement_name: str | None = None,
|
|
479
|
+
statement_label: str | None = None,
|
|
480
|
+
) -> Statement:
|
|
481
|
+
"""Execute bounded DDL that completes after consuming snapshot data.
|
|
482
|
+
|
|
483
|
+
Use for statements like:
|
|
484
|
+
- CREATE TABLE (not AS SELECT)
|
|
485
|
+
- DROP TABLE
|
|
486
|
+
- ALTER TABLE
|
|
487
|
+
- CREATE VIEW
|
|
488
|
+
- DROP VIEW
|
|
489
|
+
- CREATE TABLE foo AS SELECT ... (snapshot mode, where the SELECT portion completes
|
|
490
|
+
with snapshot behavior)
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
statement_text: The DDL statement to execute
|
|
495
|
+
parameters: Optional statement parameters
|
|
496
|
+
timeout: Maximum time to wait for completion in seconds
|
|
497
|
+
statement_name: Optional name for the statement
|
|
498
|
+
statement_label: Optional label for the statement. Labels can be used to
|
|
499
|
+
group and manage related statements. The label will be
|
|
500
|
+
prefixed with "user.confluent.io/" when stored but you only
|
|
501
|
+
need to provide the label value itself (e.g., "my-ddl-batch")
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
Statement for managing the statement lifecycle
|
|
505
|
+
|
|
506
|
+
Raises:
|
|
507
|
+
OperationalError: If statement fails or times out
|
|
508
|
+
ProgrammingError: If statement is invalid
|
|
509
|
+
"""
|
|
510
|
+
with self.closing_cursor(mode=ExecutionMode.SNAPSHOT_DDL) as cur:
|
|
511
|
+
cur.execute(
|
|
512
|
+
statement_text,
|
|
513
|
+
parameters,
|
|
514
|
+
timeout=timeout,
|
|
515
|
+
statement_name=statement_name,
|
|
516
|
+
statement_label=statement_label,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Return the last version of the statement
|
|
520
|
+
return cur.statement
|
|
521
|
+
|
|
522
|
+
def execute_streaming_ddl(
|
|
523
|
+
self,
|
|
524
|
+
statement_text: str,
|
|
525
|
+
parameters: tuple | list | None = None,
|
|
526
|
+
timeout: int = 3000,
|
|
527
|
+
statement_name: str | None = None,
|
|
528
|
+
statement_label: str | None = None,
|
|
529
|
+
) -> Statement:
|
|
530
|
+
"""Execute unbounded DDL that starts a streaming job.
|
|
531
|
+
|
|
532
|
+
Use for statements like:
|
|
533
|
+
- CREATE TABLE ... AS SELECT ... (streaming mode, where the SELECT portion is unbounded)
|
|
534
|
+
- CREATE MATERIALIZED TABLE ... (streaming mode, where the table is populated by an
|
|
535
|
+
unbounded streaming job but the overall CREATE statement
|
|
536
|
+
itself completes once the population job is started)
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
statement_text: The DDL statement to execute
|
|
540
|
+
parameters: Optional statement parameters
|
|
541
|
+
timeout: Maximum time to wait for completion in seconds
|
|
542
|
+
statement_name: Optional name for the statement
|
|
543
|
+
statement_label: Optional label for the statement. Labels can be used to
|
|
544
|
+
group and manage related statements. The label will be
|
|
545
|
+
prefixed with "user.confluent.io/" when stored but you only
|
|
546
|
+
need to provide the label value itself (e.g., "streaming-jobs")
|
|
547
|
+
Returns:
|
|
548
|
+
Statement for any further management of the statement lifecycle
|
|
549
|
+
"""
|
|
550
|
+
|
|
551
|
+
with self.closing_cursor(mode=ExecutionMode.STREAMING_DDL) as cur:
|
|
552
|
+
cur.execute(
|
|
553
|
+
statement_text,
|
|
554
|
+
parameters,
|
|
555
|
+
timeout=timeout,
|
|
556
|
+
statement_name=statement_name,
|
|
557
|
+
statement_label=statement_label,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
return cur.statement
|
|
561
|
+
|
|
562
|
+
def list_statements(self, *, label: str, page_size: int = 100) -> list[Statement]:
|
|
563
|
+
"""Return a list of Statement objects for statements with the given label.
|
|
564
|
+
|
|
565
|
+
This method retrieves all statements that were created with the specified label,
|
|
566
|
+
which is useful for managing groups of related statements. The method handles
|
|
567
|
+
pagination automatically to retrieve all matching statements.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
label: The label to filter statements by. You can provide either:
|
|
571
|
+
- Just the label value (e.g., "my-batch-job") - the "user.confluent.io/"
|
|
572
|
+
prefix will be added automatically
|
|
573
|
+
- The full label with prefix (e.g., "user.confluent.io/my-batch-job")
|
|
574
|
+
page_size: Number of statements to fetch per API request (default: 100).
|
|
575
|
+
The method will automatically paginate through all results.
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
A list of Statement objects that have the specified label. Returns an
|
|
579
|
+
empty list if no statements match the label.
|
|
580
|
+
|
|
581
|
+
Raises:
|
|
582
|
+
OperationalError: If the API request fails
|
|
583
|
+
|
|
584
|
+
Example:
|
|
585
|
+
# Submit statements with a label
|
|
586
|
+
cursor.execute("SELECT * FROM users", statement_label="daily-report")
|
|
587
|
+
cursor.execute("SELECT * FROM orders", statement_label="daily-report")
|
|
588
|
+
|
|
589
|
+
# Later, retrieve all statements with that label
|
|
590
|
+
statements = connection.list_statements(label="daily-report")
|
|
591
|
+
|
|
592
|
+
# Delete all statements with the label
|
|
593
|
+
for stmt in statements:
|
|
594
|
+
connection.delete_statement(stmt)
|
|
595
|
+
"""
|
|
596
|
+
|
|
597
|
+
if not label.startswith(STATEMENT_LABEL_PREFIX):
|
|
598
|
+
# Append prefix and make it a label selector for the API query parameter. The API
|
|
599
|
+
# expects the full label key, which includes the prefix, but we want to allow users
|
|
600
|
+
# to filter by just the end-user portion of the label.
|
|
601
|
+
adjusted_label_filter = f"{STATEMENT_LABEL_PREFIX}{label}=true"
|
|
602
|
+
else:
|
|
603
|
+
adjusted_label_filter = f"{label}=true"
|
|
604
|
+
|
|
605
|
+
statements: list[Statement] = []
|
|
606
|
+
|
|
607
|
+
has_more_pages = True
|
|
608
|
+
next_page_token: str | None = None
|
|
609
|
+
# Use the `label_selector` query parameter to filter statements by label
|
|
610
|
+
# on the server side.
|
|
611
|
+
parameters = {"label_selector": adjusted_label_filter, "page_size": page_size}
|
|
612
|
+
while has_more_pages:
|
|
613
|
+
response = self._request("/statements", params=parameters)
|
|
614
|
+
resp_json = response.json()
|
|
615
|
+
statements_json = resp_json.get("data", [])
|
|
616
|
+
statements.extend(Statement.from_response(self, s) for s in statements_json)
|
|
617
|
+
|
|
618
|
+
# Check if there are more pages to fetch based on the presence of a 'next' link in the
|
|
619
|
+
# response metadata. The 'next' value will be an entire URL, but we just need to extract
|
|
620
|
+
# the page token from it for the next request.
|
|
621
|
+
next_page_token = self._get_next_page_token(resp_json.get("metadata", {}).get("next"))
|
|
622
|
+
if next_page_token:
|
|
623
|
+
parameters["page_token"] = next_page_token
|
|
624
|
+
has_more_pages = next_page_token is not None
|
|
625
|
+
|
|
626
|
+
return statements
|
|
627
|
+
|
|
628
|
+
def delete_statement(self, statement: str | Statement) -> None:
|
|
629
|
+
"""
|
|
630
|
+
Delete a statement by name or Statement object.
|
|
631
|
+
|
|
632
|
+
In Flink SQL, executed statements (especially streaming ones) create
|
|
633
|
+
resources that linger on within CCLoud until explicitly deleted (or
|
|
634
|
+
have stopped and enough time has passed for automatic cleanup).
|
|
635
|
+
|
|
636
|
+
Deleting a RUNNING statement will stop it first.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
statement: The name of the statement to delete, or the Statement object. If passed
|
|
640
|
+
a Statement object that is already deleted, the deletion is ignored. However, if
|
|
641
|
+
passed a Statement object representing a still running statement, the delete
|
|
642
|
+
operation will be performed, causing the statement to be stopped and deleted.
|
|
643
|
+
"""
|
|
644
|
+
|
|
645
|
+
if isinstance(statement, Statement):
|
|
646
|
+
if statement.is_deleted:
|
|
647
|
+
logger.info(f"Statement {statement.name} is already deleted, ignoring")
|
|
648
|
+
return
|
|
649
|
+
statement_name = statement.name
|
|
650
|
+
else:
|
|
651
|
+
if not isinstance(statement, str):
|
|
652
|
+
raise TypeError(
|
|
653
|
+
"Statement to delete must be specified by name or Statement object, "
|
|
654
|
+
f"got {type(statement)}"
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
statement_name = statement
|
|
658
|
+
|
|
659
|
+
logger.info(f"Deleting statement {statement_name}")
|
|
660
|
+
response = self._request(
|
|
661
|
+
f"/statements/{statement_name}", method="DELETE", raise_for_status=False
|
|
662
|
+
)
|
|
663
|
+
try:
|
|
664
|
+
response.raise_for_status()
|
|
665
|
+
except httpx.HTTPStatusError as e:
|
|
666
|
+
if e.response.status_code != 404:
|
|
667
|
+
raise OperationalError("Error deleting statement") from e
|
|
668
|
+
# If the response is 404, it means we don't need to delete the statement.
|
|
669
|
+
logger.info(f"Statement '{statement_name}' not found while deleting, ignoring")
|
|
670
|
+
|
|
671
|
+
if isinstance(statement, Statement):
|
|
672
|
+
# Mark the Statement object as deleted for if the caller still is interested in its
|
|
673
|
+
# reference.
|
|
674
|
+
statement.set_deleted()
|
|
675
|
+
|
|
676
|
+
@property
|
|
677
|
+
def is_closed(self) -> bool:
|
|
678
|
+
"""
|
|
679
|
+
Check if the connection is closed.
|
|
680
|
+
|
|
681
|
+
Returns:
|
|
682
|
+
True if the connection is closed, False otherwise
|
|
683
|
+
"""
|
|
684
|
+
return self._closed
|
|
685
|
+
|
|
686
|
+
@property
|
|
687
|
+
def http_user_agent(self) -> str:
|
|
688
|
+
"""
|
|
689
|
+
Get the User-Agent header value sent with all HTTP requests.
|
|
690
|
+
|
|
691
|
+
Returns:
|
|
692
|
+
The current User-Agent string
|
|
693
|
+
"""
|
|
694
|
+
return self._http_user_agent
|
|
695
|
+
|
|
696
|
+
@http_user_agent.setter
|
|
697
|
+
def http_user_agent(self, value: str) -> None:
|
|
698
|
+
"""
|
|
699
|
+
Set the User-Agent header value for all HTTP requests made by this connection.
|
|
700
|
+
|
|
701
|
+
The User-Agent identifies the client software making requests to Confluent Cloud.
|
|
702
|
+
This is useful for tracking, debugging, and analytics purposes.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
value: The User-Agent string to use. Must be a non-empty string between
|
|
706
|
+
1 and 100 characters in length.
|
|
707
|
+
|
|
708
|
+
Raises:
|
|
709
|
+
InterfaceError: If value is not a string, is empty, or exceeds 100 characters
|
|
710
|
+
|
|
711
|
+
Example:
|
|
712
|
+
conn.http_user_agent = "my-app/1.0"
|
|
713
|
+
"""
|
|
714
|
+
if not isinstance(value, str):
|
|
715
|
+
raise InterfaceError(f"http_user_agent must be a string, got {type(value).__name__}")
|
|
716
|
+
|
|
717
|
+
if len(value) < 1 or len(value) > 100:
|
|
718
|
+
raise InterfaceError(
|
|
719
|
+
f"http_user_agent length must be between 1 and 100 characters, got {len(value)}"
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
self._http_user_agent = value
|
|
723
|
+
|
|
724
|
+
# Update the httpx client headers if client is already initialized
|
|
725
|
+
if hasattr(self, "_client"):
|
|
726
|
+
self._client.headers["User-Agent"] = value
|
|
727
|
+
|
|
728
|
+
def register_row_type(self, class_for_flink_row: type[RowPythonTypes]) -> None:
|
|
729
|
+
"""Register a user-defined namedtuple, NamedTuple, or @dataclass class to be used
|
|
730
|
+
to return deserialized ROW values.
|
|
731
|
+
|
|
732
|
+
The user-provided class to use when deserializing a ROW in any particular resultset is
|
|
733
|
+
determined by matching the sequence of ROW field names to the ordered sequence of declared
|
|
734
|
+
field names in the user-provided namedtuple, NamedTuple or @dataclass class.
|
|
735
|
+
|
|
736
|
+
If no user-registered class matches the field names of a ROW type in a resultset,
|
|
737
|
+
a new namedtuple class will be created and cached for future use.
|
|
738
|
+
"""
|
|
739
|
+
|
|
740
|
+
self._row_type_registry.register_row_type(class_for_flink_row)
|
|
741
|
+
|
|
742
|
+
def _execute_statement(
|
|
743
|
+
self,
|
|
744
|
+
statement: str,
|
|
745
|
+
execution_mode: ExecutionMode,
|
|
746
|
+
statement_name: str | None = None,
|
|
747
|
+
statement_label: str | None = None,
|
|
748
|
+
) -> dict[str, Any]:
|
|
749
|
+
"""
|
|
750
|
+
Execute a SQL statement and return the response.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
statement: The SQL statement to execute
|
|
754
|
+
parameters: Parameters for the SQL statement (optional)
|
|
755
|
+
statement_name: Optional name for the statement (defaults to 'dbapi-{uuid}')
|
|
756
|
+
statement_label: Optional label for the statement for easier identification in
|
|
757
|
+
server logs and UIs (defaults to None).
|
|
758
|
+
|
|
759
|
+
Returns:
|
|
760
|
+
Dictionary containing the API response
|
|
761
|
+
|
|
762
|
+
Raises:
|
|
763
|
+
OperationalError: If statement execution fails
|
|
764
|
+
"""
|
|
765
|
+
|
|
766
|
+
# Create the statement payload as per Flink SQL API documentation
|
|
767
|
+
if statement_name is None:
|
|
768
|
+
statement_name = f"dbapi-{str(uuid.uuid4())}"
|
|
769
|
+
|
|
770
|
+
# Each connection uses a single environment, also
|
|
771
|
+
# called catalog, so we set the property here
|
|
772
|
+
properties = {"sql.current-catalog": self.environment}
|
|
773
|
+
|
|
774
|
+
if self._dbname is not None:
|
|
775
|
+
properties["sql.current-database"] = self._dbname
|
|
776
|
+
|
|
777
|
+
if execution_mode.is_snapshot:
|
|
778
|
+
# Ask for snapshot mode behavior -- point-in-time results.
|
|
779
|
+
properties["sql.snapshot.mode"] = "now"
|
|
780
|
+
|
|
781
|
+
payload = {
|
|
782
|
+
"name": statement_name,
|
|
783
|
+
"organization_id": self.organization_id,
|
|
784
|
+
"environment_id": self.environment,
|
|
785
|
+
"spec": {
|
|
786
|
+
"statement": statement,
|
|
787
|
+
"properties": properties,
|
|
788
|
+
"compute_pool_id": self.compute_pool_id,
|
|
789
|
+
"stopped": False,
|
|
790
|
+
},
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
if statement_label is not None:
|
|
794
|
+
# Guard against user already including the mandatory prefix.
|
|
795
|
+
if statement_label.startswith(STATEMENT_LABEL_PREFIX):
|
|
796
|
+
label_key = statement_label
|
|
797
|
+
else:
|
|
798
|
+
label_key = f"{STATEMENT_LABEL_PREFIX}{statement_label}"
|
|
799
|
+
|
|
800
|
+
payload["metadata"] = {
|
|
801
|
+
"labels": {label_key: "true"},
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
# Submit statement using the API
|
|
805
|
+
res = self._request("/statements", method="POST", json=payload)
|
|
806
|
+
return res.json()
|
|
807
|
+
|
|
808
|
+
def _get_statement(self, statement_name: str) -> dict[str, Any]:
|
|
809
|
+
"""
|
|
810
|
+
Get the current structure of a statement.
|
|
811
|
+
|
|
812
|
+
Args:
|
|
813
|
+
statement_name: The name of the statement to check
|
|
814
|
+
|
|
815
|
+
Returns:
|
|
816
|
+
Dictionary containing the statement status and details
|
|
817
|
+
|
|
818
|
+
Raises:
|
|
819
|
+
OperationalError: If status check fails
|
|
820
|
+
"""
|
|
821
|
+
return self._request(f"/statements/{statement_name}").json()
|
|
822
|
+
|
|
823
|
+
def _get_statement_results(
|
|
824
|
+
self, statement_name: str, next_url: str | None
|
|
825
|
+
) -> tuple[list[ChangelogRow], str | None]:
|
|
826
|
+
"""
|
|
827
|
+
Try to get a page of results for a statement.
|
|
828
|
+
|
|
829
|
+
Args:
|
|
830
|
+
statement_name: The name of the statement
|
|
831
|
+
next_url: Optional full URL to fetch the next page of results from. If None, then
|
|
832
|
+
the results endpoint for the statement will be used.
|
|
833
|
+
|
|
834
|
+
Returns:
|
|
835
|
+
A 2-tuple: (list of results in changelog row format, optional url to fetch next page.)
|
|
836
|
+
If the next page URL is None, there are no more pages to fetch.
|
|
837
|
+
|
|
838
|
+
Raises:
|
|
839
|
+
StatementDeletedError: If the statement has been deleted (404)
|
|
840
|
+
OperationalError: If results retrieval fails for other reasons
|
|
841
|
+
"""
|
|
842
|
+
if next_url is None:
|
|
843
|
+
next_url = f"/statements/{statement_name}/results"
|
|
844
|
+
|
|
845
|
+
try:
|
|
846
|
+
response = self._request(next_url).json()
|
|
847
|
+
except OperationalError as e:
|
|
848
|
+
# Check if this is a 404 error indicating the statement was deleted
|
|
849
|
+
if "404" in str(e):
|
|
850
|
+
raise StatementDeletedError(
|
|
851
|
+
f"Statement '{statement_name}' has been deleted", statement_name
|
|
852
|
+
) from e
|
|
853
|
+
raise
|
|
854
|
+
|
|
855
|
+
# Check if the response indicates an error (e.g., statement not found)
|
|
856
|
+
# Some APIs return 200 OK with an error payload instead of proper HTTP status codes
|
|
857
|
+
if response is None:
|
|
858
|
+
raise StatementDeletedError(
|
|
859
|
+
f"Statement '{statement_name}' has been deleted", statement_name
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
# Promote from the pure from-response-json 'data' sub-member list of dicts
|
|
863
|
+
# to a list of ChangelogRow.
|
|
864
|
+
data_list = response.get("results", {}).get("data")
|
|
865
|
+
if data_list is None:
|
|
866
|
+
# Check if this is an error response indicating the statement was deleted
|
|
867
|
+
error = response.get("error")
|
|
868
|
+
if error:
|
|
869
|
+
error_code = error.get("code")
|
|
870
|
+
if error_code == 404 or "not found" in str(error).lower():
|
|
871
|
+
raise StatementDeletedError(
|
|
872
|
+
f"Statement '{statement_name}' has been deleted", statement_name
|
|
873
|
+
)
|
|
874
|
+
raise OperationalError(f"Error fetching results: {error}")
|
|
875
|
+
# If no error but data is None, treat as deleted statement
|
|
876
|
+
raise StatementDeletedError(
|
|
877
|
+
f"Statement '{statement_name}' has been deleted or returned invalid response",
|
|
878
|
+
statement_name,
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
# Promote to ChangelogRow namedtuples, which include the 'op' field for changelog queries,
|
|
882
|
+
# defaulting to 0 (INSERT) if not present. If no (new) results are currently available, this
|
|
883
|
+
# will be an empty list.
|
|
884
|
+
results: list[ChangelogRow] = [
|
|
885
|
+
# 'op' may be omitted, in which case we assume 0 (INSERT)
|
|
886
|
+
ChangelogRow(r.get("op", 0), r["row"])
|
|
887
|
+
for r in data_list
|
|
888
|
+
]
|
|
889
|
+
|
|
890
|
+
logger.info(f"got {len(results)} changelog rows for statement {statement_name}")
|
|
891
|
+
next_url = response.get("metadata", {}).get("next") or None
|
|
892
|
+
|
|
893
|
+
return (results, next_url)
|
|
894
|
+
|
|
895
|
+
def _request(self, url, method="GET", raise_for_status=True, **kwargs) -> httpx.Response:
|
|
896
|
+
if self._closed:
|
|
897
|
+
raise InterfaceError("Connection is closed")
|
|
898
|
+
|
|
899
|
+
try:
|
|
900
|
+
response = self._client.request(method, url, **kwargs)
|
|
901
|
+
logger.debug("Response: %s", response.content)
|
|
902
|
+
if raise_for_status:
|
|
903
|
+
response.raise_for_status()
|
|
904
|
+
return response
|
|
905
|
+
except httpx.HTTPStatusError as e:
|
|
906
|
+
try:
|
|
907
|
+
res = e.response.json()
|
|
908
|
+
errors = res.get("errors", [])
|
|
909
|
+
details = "; ".join([err["detail"] for err in errors])
|
|
910
|
+
except Exception:
|
|
911
|
+
details = "no more details"
|
|
912
|
+
|
|
913
|
+
raise OperationalError(
|
|
914
|
+
f"error sending request '{e.response.status_code}' - {details}"
|
|
915
|
+
) from e
|
|
916
|
+
|
|
917
|
+
def _get_next_page_token(self, next_url: str | None) -> str | None:
|
|
918
|
+
"""Extract the next page token from the next_url, if present."""
|
|
919
|
+
if next_url is None:
|
|
920
|
+
return None
|
|
921
|
+
|
|
922
|
+
# The next_url is expected to be a full URL with a query parameter like '?page_token=abc123'
|
|
923
|
+
# We can parse it to extract the page_token value.
|
|
924
|
+
parsed = httpx.URL(next_url)
|
|
925
|
+
page_token = parsed.params.get("page_token")
|
|
926
|
+
return page_token
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
class RowTypeRegistry:
|
|
930
|
+
"""Registry for namedtuple, NamedTuple or @dataclass classes used for deserializing
|
|
931
|
+
ROW values from query results.
|
|
932
|
+
|
|
933
|
+
Users can register their own classes to be used for specific
|
|
934
|
+
field structures via `connection.register_row_type()`. Then any query results
|
|
935
|
+
returning ROW values with matching field names will be deserialized
|
|
936
|
+
into instances of the user-registered class.
|
|
937
|
+
|
|
938
|
+
Otherwise, if no user-registered class matches the field names, a new
|
|
939
|
+
namedtuple class will be created and cached for future use.
|
|
940
|
+
"""
|
|
941
|
+
|
|
942
|
+
_cache: dict[tuple[str, ...], type[RowPythonTypes]]
|
|
943
|
+
|
|
944
|
+
def __init__(self):
|
|
945
|
+
# Key: tuple of field names (strings)
|
|
946
|
+
# Value: The specific class object (type)
|
|
947
|
+
self._cache = {}
|
|
948
|
+
|
|
949
|
+
def get_row_class(self, field_names: list[str] | tuple[str, ...]) -> type[RowPythonTypes]:
|
|
950
|
+
"""
|
|
951
|
+
Returns the cached user-provided class for handling ROWs with the given field names.
|
|
952
|
+
If none found, creates a namedtuple class (and caches it).
|
|
953
|
+
|
|
954
|
+
field_names: A sequence of strings (e.g., ['name', 'age'])
|
|
955
|
+
"""
|
|
956
|
+
|
|
957
|
+
if not isinstance(field_names, (list, tuple)):
|
|
958
|
+
raise TypeError(
|
|
959
|
+
f"field_names must be a list or tuple of strings, got {type(field_names)}"
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
for field in field_names:
|
|
963
|
+
if not isinstance(field, str):
|
|
964
|
+
raise TypeError(f"All field names must be strings, got a {type(field)}")
|
|
965
|
+
|
|
966
|
+
# Create a hashable key from the field names
|
|
967
|
+
key = tuple(field_names)
|
|
968
|
+
|
|
969
|
+
if key not in self._cache:
|
|
970
|
+
# Create a default class name, e.g., 'Row'
|
|
971
|
+
# rename=True handles Flink columns with chars invalid in Python
|
|
972
|
+
new_class = namedtuple("Row", field_names, rename=True)
|
|
973
|
+
logger.debug(
|
|
974
|
+
f"Created new namedtuple class for ROW with fields: {field_names}, "
|
|
975
|
+
f"resulting namedtuple fields: {new_class._fields}"
|
|
976
|
+
) # pyright: ignore[reportAttributeAccessIssue]
|
|
977
|
+
self._cache[key] = new_class
|
|
978
|
+
|
|
979
|
+
return self._cache[key]
|
|
980
|
+
|
|
981
|
+
def register_row_type(self, user_type_for_row: type[RowPythonTypes]) -> None:
|
|
982
|
+
"""
|
|
983
|
+
Registers a user-provided namedtuple, typing.NamedTuple, or @dataclass class by
|
|
984
|
+
the sequence of its field names for future use when deserializing ROW values.
|
|
985
|
+
|
|
986
|
+
Raises TypeError if the provided type is not a supported class type.
|
|
987
|
+
"""
|
|
988
|
+
|
|
989
|
+
key: tuple[str, ...] | None = None
|
|
990
|
+
|
|
991
|
+
if isinstance(user_type_for_row, type):
|
|
992
|
+
# Check for duck-typed namedtuple or typing.NamedTuple: subclass of tuple + has _fields
|
|
993
|
+
if issubclass(user_type_for_row, tuple) and hasattr(user_type_for_row, "_fields"):
|
|
994
|
+
key = tuple(user_type_for_row._fields) # pyright: ignore[reportAttributeAccessIssue]
|
|
995
|
+
|
|
996
|
+
# Only other supported type is an @dataclass
|
|
997
|
+
elif is_dataclass(user_type_for_row):
|
|
998
|
+
key = tuple(field.name for field in fields(user_type_for_row))
|
|
999
|
+
|
|
1000
|
+
if key is None:
|
|
1001
|
+
# User passed a non-supported type or an instance of something.
|
|
1002
|
+
raise TypeError(
|
|
1003
|
+
f"Expected a namedtuple, NamedTuple, or @dataclass type, got {user_type_for_row} instead" # noqa: E501
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
# Update the cache to prefer the user's class for this structure
|
|
1007
|
+
self._cache[key] = user_type_for_row
|