confluent-sql 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confluent_sql/__init__.py +64 -0
- confluent_sql/__version__.py +10 -0
- confluent_sql/changelog_compressor.py +603 -0
- confluent_sql/connection.py +1007 -0
- confluent_sql/cursor.py +804 -0
- confluent_sql/exceptions.py +209 -0
- confluent_sql/execution_mode.py +34 -0
- confluent_sql/result_readers.py +663 -0
- confluent_sql/statement.py +566 -0
- confluent_sql/types.py +1606 -0
- confluent_sql-0.1.0.dist-info/METADATA +214 -0
- confluent_sql-0.1.0.dist-info/RECORD +14 -0
- confluent_sql-0.1.0.dist-info/WHEEL +4 -0
- confluent_sql-0.1.0.dist-info/licenses/LICENSE.txt +203 -0
confluent_sql/cursor.py
ADDED
|
@@ -0,0 +1,804 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cursor module for Confluent SQL DB-API driver.
|
|
3
|
+
|
|
4
|
+
This module provides the Cursor class for executing SQL statements and
|
|
5
|
+
retrieving results from Confluent SQL services.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import random
|
|
12
|
+
import time
|
|
13
|
+
import warnings
|
|
14
|
+
from collections.abc import Iterator
|
|
15
|
+
from typing import TYPE_CHECKING, Any, TypeAlias
|
|
16
|
+
|
|
17
|
+
from .changelog_compressor import ChangelogCompressor, create_changelog_compressor
|
|
18
|
+
from .exceptions import (
|
|
19
|
+
ComputePoolExhaustedError,
|
|
20
|
+
InterfaceError,
|
|
21
|
+
OperationalError,
|
|
22
|
+
ProgrammingError,
|
|
23
|
+
)
|
|
24
|
+
from .execution_mode import ExecutionMode
|
|
25
|
+
from .result_readers import (
|
|
26
|
+
AppendOnlyResultReader,
|
|
27
|
+
ChangelogEventReader,
|
|
28
|
+
ChangeloggedRow,
|
|
29
|
+
FetchMetrics,
|
|
30
|
+
ResultReader,
|
|
31
|
+
ResultTupleOrDict,
|
|
32
|
+
)
|
|
33
|
+
from .statement import Statement
|
|
34
|
+
from .types import convert_statement_parameters
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from .connection import Connection
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
ResultRow: TypeAlias = ResultTupleOrDict | ChangeloggedRow
|
|
42
|
+
"""A single row of results returned from a cursor fetch or iteration.
|
|
43
|
+
Can be a dict or tuple depending on cursor configuration and statement schema, or a ChangeloggedRow
|
|
44
|
+
containing the changelog operation and a dict or tuple if the statement is not append-only."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Cursor:
|
|
48
|
+
"""
|
|
49
|
+
A cursor for executing SQL statements and retrieving results.
|
|
50
|
+
|
|
51
|
+
This class provides methods for executing SQL statements and fetching
|
|
52
|
+
results from a Confluent SQL service connection.
|
|
53
|
+
|
|
54
|
+
Result Consumption Methods
|
|
55
|
+
--------------------------
|
|
56
|
+
Two approaches are available for consuming query results, with different
|
|
57
|
+
blocking behaviors in streaming mode:
|
|
58
|
+
|
|
59
|
+
1. **Iteration (for row in cursor):**
|
|
60
|
+
- Always blocking, waits for data to become available
|
|
61
|
+
- Suitable for consuming complete result sets
|
|
62
|
+
- Will retry fetching until data arrives or stream ends
|
|
63
|
+
|
|
64
|
+
2. **Fetch methods (fetchone/fetchmany/fetchall):**
|
|
65
|
+
- Snapshot mode: Blocking (traditional DB-API behavior)
|
|
66
|
+
- Streaming mode: Non-blocking for fetchone/fetchmany (single request max)
|
|
67
|
+
- Use cursor.may_have_results to check if more data might come
|
|
68
|
+
|
|
69
|
+
For streaming queries, choose based on your use case:
|
|
70
|
+
- Continuous consumption: Use iteration
|
|
71
|
+
- Polling/async patterns: Use fetch methods with may_have_results
|
|
72
|
+
|
|
73
|
+
Result Type Determination
|
|
74
|
+
-------------------------
|
|
75
|
+
The cursor exposes properties to help determine what type of results will be returned:
|
|
76
|
+
|
|
77
|
+
- `cursor.as_dict`: True if rows are dicts, False if tuples
|
|
78
|
+
- `cursor.execution_mode`: SNAPSHOT or STREAMING_QUERY
|
|
79
|
+
- `cursor.is_streaming`: True if in streaming mode (convenience for execution_mode check)
|
|
80
|
+
- `cursor.returns_changelog`: True if results are ChangeloggedRow namedtuples
|
|
81
|
+
- `cursor.statement.is_append_only`: True if only INSERT operations (after execute)
|
|
82
|
+
|
|
83
|
+
Example:
|
|
84
|
+
cursor.execute("SELECT * FROM orders")
|
|
85
|
+
if cursor.returns_changelog:
|
|
86
|
+
# Results are ChangeloggedRow(op=..., row=...)
|
|
87
|
+
row = cursor.fetchone()
|
|
88
|
+
if row:
|
|
89
|
+
op, data = row # Unpack operation and row data
|
|
90
|
+
else:
|
|
91
|
+
# Results are plain tuples or dicts
|
|
92
|
+
row = cursor.fetchone() # tuple or dict based on cursor.as_dict
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
connection: Connection,
|
|
98
|
+
*,
|
|
99
|
+
as_dict: bool = False,
|
|
100
|
+
execution_mode: ExecutionMode,
|
|
101
|
+
):
|
|
102
|
+
"""
|
|
103
|
+
Initialize a new cursor.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
connection: The Connection object this cursor is associated with.
|
|
107
|
+
as_dict: If True, fetch results as dictionaries; otherwise, as tuples.
|
|
108
|
+
"""
|
|
109
|
+
self.rowcount = -1
|
|
110
|
+
self._arraysize = 1
|
|
111
|
+
|
|
112
|
+
# Cursor state
|
|
113
|
+
self._connection = connection
|
|
114
|
+
self._closed = False
|
|
115
|
+
self._next_page = None
|
|
116
|
+
self._results_as_dicts = as_dict
|
|
117
|
+
self._execution_mode = execution_mode
|
|
118
|
+
|
|
119
|
+
# Statement execution state
|
|
120
|
+
self._statement: Statement | None = None
|
|
121
|
+
self._result_reader: AppendOnlyResultReader | ChangelogEventReader | None = (
|
|
122
|
+
None
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def description(self) -> list[tuple] | None:
|
|
127
|
+
self._raise_if_closed()
|
|
128
|
+
|
|
129
|
+
# Required by DB-API: https://peps.python.org/pep-0249/#description
|
|
130
|
+
if self._statement is None:
|
|
131
|
+
return None
|
|
132
|
+
else:
|
|
133
|
+
return self._statement.description
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def arraysize(self) -> int:
|
|
137
|
+
"""
|
|
138
|
+
Get the number of rows to fetch in a single fetchmany() call.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
The current arraysize value (default: 1).
|
|
142
|
+
"""
|
|
143
|
+
return self._arraysize
|
|
144
|
+
|
|
145
|
+
@arraysize.setter
|
|
146
|
+
def arraysize(self, value: int) -> None:
|
|
147
|
+
"""
|
|
148
|
+
Set the number of rows to fetch in a single fetchmany() call.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
value: A positive integer representing the number of rows to fetch.
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
InterfaceError: If value is not an integer, or is not a positive number.
|
|
155
|
+
"""
|
|
156
|
+
if not isinstance(value, int) or isinstance(value, bool):
|
|
157
|
+
raise InterfaceError(
|
|
158
|
+
f"arraysize must be a positive integer, got {type(value).__name__}"
|
|
159
|
+
)
|
|
160
|
+
if value <= 0:
|
|
161
|
+
raise InterfaceError(
|
|
162
|
+
f"arraysize must be a positive integer, got {value}"
|
|
163
|
+
)
|
|
164
|
+
self._arraysize = value
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def statement(self) -> Statement:
|
|
168
|
+
"""
|
|
169
|
+
Get the current statement associated with the cursor.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
The current Statement object.
|
|
173
|
+
|
|
174
|
+
Raises:
|
|
175
|
+
InterfaceError: If no statement has been executed yet.
|
|
176
|
+
"""
|
|
177
|
+
if self._statement is None:
|
|
178
|
+
raise InterfaceError("No statement has been executed yet.")
|
|
179
|
+
return self._statement
|
|
180
|
+
|
|
181
|
+
def execute(
|
|
182
|
+
self,
|
|
183
|
+
statement_text: str,
|
|
184
|
+
parameters: tuple | list | None = None,
|
|
185
|
+
*,
|
|
186
|
+
timeout: int = 3000,
|
|
187
|
+
statement_name: str | None = None,
|
|
188
|
+
statement_label: str | None = None,
|
|
189
|
+
) -> None:
|
|
190
|
+
"""
|
|
191
|
+
Execute a SQL statement.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
statement_text: The SQL statement to execute
|
|
195
|
+
parameters: Parameters for the SQL statement (optional)
|
|
196
|
+
timeout: Maximum time to wait for statement completion (snapshot) or initialization
|
|
197
|
+
(streaming) in seconds (default: 3000)
|
|
198
|
+
statement_name: Optional name for the statement (defaults to DB-API UUID
|
|
199
|
+
if not provided)
|
|
200
|
+
statement_label: Optional label for the statement. Labels can be used to
|
|
201
|
+
group and manage related statements. The label will be
|
|
202
|
+
prefixed with "user.confluent.io/" when stored but you only
|
|
203
|
+
need to provide the label value itself (e.g., "my-batch-job").
|
|
204
|
+
Use Connection.list_statements(label=...) to retrieve statements
|
|
205
|
+
by label.
|
|
206
|
+
|
|
207
|
+
Raises:
|
|
208
|
+
InterfaceError: If the cursor is closed
|
|
209
|
+
ProgrammingError: If the SQL statement is invalid
|
|
210
|
+
OperationalError: If the statement cannot be executed
|
|
211
|
+
"""
|
|
212
|
+
self._raise_if_closed()
|
|
213
|
+
|
|
214
|
+
if not statement_text.strip():
|
|
215
|
+
raise ProgrammingError("SQL statement cannot be empty")
|
|
216
|
+
|
|
217
|
+
# Delete any previous statement if present and in a deletable state
|
|
218
|
+
if self._statement is not None and not self._statement.is_deleted:
|
|
219
|
+
if self._statement.is_deletable:
|
|
220
|
+
self.delete_statement()
|
|
221
|
+
else:
|
|
222
|
+
warnings.warn(
|
|
223
|
+
"Executing a new statement on a cursor with an existing active"
|
|
224
|
+
f" statement. The previous statement {self._statement.name} will not be deleted"
|
|
225
|
+
" automatically.",
|
|
226
|
+
stacklevel=2,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Reset internal state
|
|
230
|
+
self._statement = None
|
|
231
|
+
self._statement_handle = None
|
|
232
|
+
self._result_reader = None
|
|
233
|
+
self.rowcount = -1
|
|
234
|
+
self._next_page = None
|
|
235
|
+
|
|
236
|
+
# Now submit the statement ...
|
|
237
|
+
self._statement = self._submit_statement(
|
|
238
|
+
statement_text, parameters, statement_name, statement_label
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if self._statement.is_failed:
|
|
242
|
+
raise OperationalError(
|
|
243
|
+
f"Statement submission failed: {self._statement.status.get('detail', '')}"
|
|
244
|
+
) # pragma: no cover
|
|
245
|
+
|
|
246
|
+
# ... and wait for it to be "ready" (either in a terminal state or running) based on
|
|
247
|
+
# execution mode and statement type.
|
|
248
|
+
self._wait_for_statement_ready(timeout)
|
|
249
|
+
|
|
250
|
+
def executemany(self, operation: str, seq_of_parameters: list[tuple | list | dict]):
|
|
251
|
+
# Implement this if needed.
|
|
252
|
+
# XXX: We need to handle multiple statements with a single cursor here,
|
|
253
|
+
# the logic currently implies each cursor handles a single statement at a time
|
|
254
|
+
raise NotImplementedError("executemany not implemented")
|
|
255
|
+
|
|
256
|
+
def _raise_if_ddl_mode(self):
|
|
257
|
+
"""Raise if cursor is in a DDL mode that doesn't support result fetching."""
|
|
258
|
+
if self._execution_mode.is_ddl:
|
|
259
|
+
raise InterfaceError(
|
|
260
|
+
f"Cannot fetch results in {self._execution_mode}. "
|
|
261
|
+
"DDL statements do not produce result sets."
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def _get_result_reader(self) -> ResultReader[Any]:
|
|
265
|
+
"""Raise if result reader is not initialized, which should be the case if the
|
|
266
|
+
statement is not append-only or if we haven't successfully waited for the statement to
|
|
267
|
+
be ready."""
|
|
268
|
+
if self._result_reader is None:
|
|
269
|
+
raise InterfaceError(
|
|
270
|
+
"Result reader not initialized. This likely means the statement"
|
|
271
|
+
" is not ready for fetching results yet."
|
|
272
|
+
) # pragma: no cover
|
|
273
|
+
|
|
274
|
+
return self._result_reader
|
|
275
|
+
|
|
276
|
+
def fetchone(self) -> ResultRow | None:
|
|
277
|
+
"""
|
|
278
|
+
Fetch the next row of a query result set.
|
|
279
|
+
|
|
280
|
+
Behavior depends on execution mode:
|
|
281
|
+
- Snapshot mode with bounded queries: Uses traditional blocking behavior,
|
|
282
|
+
fetching additional pages if needed to return a row.
|
|
283
|
+
- Streaming mode or changelog queries: Non-blocking, makes at most one
|
|
284
|
+
server request and returns None if no data is immediately available.
|
|
285
|
+
|
|
286
|
+
Important for streaming mode:
|
|
287
|
+
When fetchone() returns None, check cursor.may_have_results to determine:
|
|
288
|
+
- If may_have_results is True: No data currently available, but more may
|
|
289
|
+
arrive later. Continue polling.
|
|
290
|
+
- If may_have_results is False: End of results reached. No more data will
|
|
291
|
+
arrive.
|
|
292
|
+
|
|
293
|
+
Example (streaming mode):
|
|
294
|
+
while True:
|
|
295
|
+
row = cursor.fetchone()
|
|
296
|
+
if row is not None:
|
|
297
|
+
process_row(row)
|
|
298
|
+
elif not cursor.may_have_results:
|
|
299
|
+
break # End of results
|
|
300
|
+
else:
|
|
301
|
+
time.sleep(0.1) # Wait before polling again
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
A single row (tuple or dict based on cursor settings) or None if no
|
|
305
|
+
rows are available.
|
|
306
|
+
|
|
307
|
+
Raises:
|
|
308
|
+
InterfaceError: If cursor is closed or in DDL mode.
|
|
309
|
+
"""
|
|
310
|
+
self._raise_if_closed()
|
|
311
|
+
self._raise_if_ddl_mode()
|
|
312
|
+
|
|
313
|
+
return self._get_result_reader().fetchone()
|
|
314
|
+
|
|
315
|
+
def fetchmany(self, size: int | None = None) -> list[ResultRow]:
|
|
316
|
+
"""
|
|
317
|
+
Fetch up to 'size' rows from the query result set.
|
|
318
|
+
|
|
319
|
+
Behavior depends on execution mode:
|
|
320
|
+
- Snapshot mode with bounded queries: Uses traditional blocking behavior,
|
|
321
|
+
fetching multiple pages if needed to return up to 'size' rows.
|
|
322
|
+
- Streaming mode or changelog queries: Non-blocking, makes at most one
|
|
323
|
+
server request and may return fewer rows than requested.
|
|
324
|
+
|
|
325
|
+
Important for streaming mode:
|
|
326
|
+
When fetchmany() returns an empty list, check cursor.may_have_results to
|
|
327
|
+
determine:
|
|
328
|
+
- If may_have_results is True: No data currently available, but more may
|
|
329
|
+
arrive later. Continue polling.
|
|
330
|
+
- If may_have_results is False: End of results reached. No more data will
|
|
331
|
+
arrive.
|
|
332
|
+
|
|
333
|
+
Example (streaming mode):
|
|
334
|
+
while cursor.may_have_results:
|
|
335
|
+
rows = cursor.fetchmany(10)
|
|
336
|
+
if rows:
|
|
337
|
+
for row in rows:
|
|
338
|
+
process_row(row)
|
|
339
|
+
else:
|
|
340
|
+
time.sleep(0.1) # Wait before polling again
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
size: Maximum number of rows to return. If None, uses cursor.arraysize.
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
List of 0 to 'size' rows. In snapshot mode, will attempt to return
|
|
347
|
+
exactly 'size' rows if available. In streaming mode, returns whatever
|
|
348
|
+
is immediately available (possibly empty list).
|
|
349
|
+
Rows are returned as tuples or dicts based on cursor settings.
|
|
350
|
+
|
|
351
|
+
Raises:
|
|
352
|
+
InterfaceError: If cursor is closed or in DDL mode.
|
|
353
|
+
"""
|
|
354
|
+
self._raise_if_closed()
|
|
355
|
+
self._raise_if_ddl_mode()
|
|
356
|
+
|
|
357
|
+
return self._get_result_reader().fetchmany(
|
|
358
|
+
size if size is not None else self.arraysize
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
def fetchall(self) -> list[ResultRow]:
|
|
362
|
+
"""
|
|
363
|
+
Fetch all remaining rows of a query result, blocking until complete.
|
|
364
|
+
|
|
365
|
+
This method will fetch all remaining pages from the server and accumulate
|
|
366
|
+
them in memory. Unlike fetchone() and fetchmany(), this method blocks and
|
|
367
|
+
makes multiple server requests as needed to retrieve all results.
|
|
368
|
+
|
|
369
|
+
Warning:
|
|
370
|
+
This downloads the entire remaining result set into memory.
|
|
371
|
+
For large result sets, consider using iteration or fetchmany()
|
|
372
|
+
to process results in batches.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
A list of all remaining rows (tuples or dicts based on cursor settings).
|
|
376
|
+
Returns an empty list if no rows are available.
|
|
377
|
+
|
|
378
|
+
Raises:
|
|
379
|
+
InterfaceError: If cursor is closed or in DDL mode.
|
|
380
|
+
NotSupportedError: If called on an unbounded streaming statement,
|
|
381
|
+
since fetchall() would never complete.
|
|
382
|
+
"""
|
|
383
|
+
self._raise_if_closed()
|
|
384
|
+
self._raise_if_ddl_mode()
|
|
385
|
+
|
|
386
|
+
return self._get_result_reader().fetchall()
|
|
387
|
+
|
|
388
|
+
def __iter__(self) -> Iterator[ResultRow]:
|
|
389
|
+
"""Return the cursor as an iterator, so that our __next__ can ensure .close() checks."""
|
|
390
|
+
self._raise_if_closed()
|
|
391
|
+
self._raise_if_ddl_mode()
|
|
392
|
+
return self
|
|
393
|
+
|
|
394
|
+
def __next__(self) -> ResultRow:
|
|
395
|
+
"""Defer to the result reader's iterator after proving
|
|
396
|
+
the cursor is not yet closed."""
|
|
397
|
+
self._raise_if_closed()
|
|
398
|
+
return self._get_result_reader().__next__()
|
|
399
|
+
|
|
400
|
+
def close(self) -> None:
|
|
401
|
+
"""
|
|
402
|
+
Close the cursor and free associated resources.
|
|
403
|
+
|
|
404
|
+
This method marks the cursor as closed and releases any
|
|
405
|
+
local resources associated with it.
|
|
406
|
+
|
|
407
|
+
If the statement is in a deletable state, it will also attempt to
|
|
408
|
+
delete the statement from the server to free server-side resources.
|
|
409
|
+
|
|
410
|
+
Active statements (e.g., running streaming queries) will not be deleted.
|
|
411
|
+
"""
|
|
412
|
+
if not self._closed:
|
|
413
|
+
if self._statement is not None and self._statement.is_deletable:
|
|
414
|
+
try:
|
|
415
|
+
# Delete the statement server-side. Our handle on it will then smell
|
|
416
|
+
# `.is_deleted` as true.
|
|
417
|
+
self.delete_statement()
|
|
418
|
+
except Exception as e:
|
|
419
|
+
logger.error(
|
|
420
|
+
f"Error deleting statement {self._statement.name} during cursor close: {e}"
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
self.rowcount = -1
|
|
424
|
+
self._closed = True
|
|
425
|
+
self._result_reader = None
|
|
426
|
+
|
|
427
|
+
def setinputsizes(self, sizes) -> None:
|
|
428
|
+
"""
|
|
429
|
+
Set the sizes of input parameters.
|
|
430
|
+
|
|
431
|
+
This method is a no-op for this implementation, as input sizes
|
|
432
|
+
are not explicitly handled.
|
|
433
|
+
"""
|
|
434
|
+
pass # pragma: no cover
|
|
435
|
+
|
|
436
|
+
def setoutputsize(self, size, column: int | None = None) -> None:
|
|
437
|
+
"""
|
|
438
|
+
Set the size of output columns.
|
|
439
|
+
|
|
440
|
+
This method is a no-op for this implementation, as output sizes
|
|
441
|
+
are not explicitly handled.
|
|
442
|
+
"""
|
|
443
|
+
pass # pragma: no cover
|
|
444
|
+
|
|
445
|
+
def delete_statement(self) -> None:
|
|
446
|
+
"""
|
|
447
|
+
Delete any possible CCloud Flink-side statement to prevent orphaned jobs / statement
|
|
448
|
+
records.
|
|
449
|
+
|
|
450
|
+
If no statement was executed, or if the statement was already deleted, this is a no-op.
|
|
451
|
+
|
|
452
|
+
Raises:
|
|
453
|
+
OperationalError: If statement deletion fails.
|
|
454
|
+
InterfaceError: If the cursor or connection is closed.
|
|
455
|
+
"""
|
|
456
|
+
self._raise_if_closed()
|
|
457
|
+
|
|
458
|
+
if self._statement is None or self._statement.is_deleted:
|
|
459
|
+
return
|
|
460
|
+
|
|
461
|
+
self._connection.delete_statement(self._statement.name)
|
|
462
|
+
self._statement.set_deleted()
|
|
463
|
+
|
|
464
|
+
@property
|
|
465
|
+
def is_closed(self) -> bool:
|
|
466
|
+
"""
|
|
467
|
+
Check if the cursor is closed.
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
True if the cursor is closed, False otherwise
|
|
471
|
+
"""
|
|
472
|
+
return self._closed
|
|
473
|
+
|
|
474
|
+
@property
|
|
475
|
+
def may_have_results(self) -> bool:
|
|
476
|
+
"""
|
|
477
|
+
Check if there may be results available to fetch.
|
|
478
|
+
|
|
479
|
+
This property is essential for streaming mode to distinguish between:
|
|
480
|
+
- Temporary emptiness: fetchone() returns None but may_have_results is True
|
|
481
|
+
(more data might arrive later)
|
|
482
|
+
- Permanent end: fetchone() returns None and may_have_results is False
|
|
483
|
+
(no more data will ever arrive)
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
True if the statement can produce results and more data may be available.
|
|
487
|
+
False if the statement cannot produce results or results are exhausted.
|
|
488
|
+
|
|
489
|
+
Raises:
|
|
490
|
+
InterfaceError: If statement.has_schema() raises (e.g., FAILED statements
|
|
491
|
+
without traits, or before statement is first polled).
|
|
492
|
+
"""
|
|
493
|
+
return (
|
|
494
|
+
self._statement is not None
|
|
495
|
+
and self._statement.has_schema()
|
|
496
|
+
and self._statement.schema is not None
|
|
497
|
+
and self._get_result_reader().may_have_results
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
@property
|
|
501
|
+
def metrics(self) -> FetchMetrics:
|
|
502
|
+
"""Return the current fetch metrics from the result reader, if available."""
|
|
503
|
+
if self._result_reader is None:
|
|
504
|
+
raise InterfaceError(
|
|
505
|
+
"No result reader initialized, cannot get metrics."
|
|
506
|
+
) # pragma: no cover
|
|
507
|
+
return self._result_reader.metrics
|
|
508
|
+
|
|
509
|
+
@property
|
|
510
|
+
def as_dict(self) -> bool:
|
|
511
|
+
"""
|
|
512
|
+
Check if the cursor returns row data as dictionaries.
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
True if rows are returned as dicts, False if as tuples.
|
|
516
|
+
This applies to the row data portion whether in plain rows or
|
|
517
|
+
within ChangeloggedRow namedtuples.
|
|
518
|
+
|
|
519
|
+
Example:
|
|
520
|
+
cursor = conn.cursor(as_dict=True)
|
|
521
|
+
if cursor.as_dict:
|
|
522
|
+
print("Results will have column names as dict keys")
|
|
523
|
+
"""
|
|
524
|
+
return self._results_as_dicts
|
|
525
|
+
|
|
526
|
+
@property
|
|
527
|
+
def execution_mode(self) -> ExecutionMode:
|
|
528
|
+
"""
|
|
529
|
+
Get the execution mode of this cursor.
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
ExecutionMode.SNAPSHOT for bounded queries or
|
|
533
|
+
ExecutionMode.STREAMING_QUERY for continuous/unbounded queries.
|
|
534
|
+
|
|
535
|
+
Example:
|
|
536
|
+
cursor = conn.cursor(mode=ExecutionMode.STREAMING_QUERY)
|
|
537
|
+
if cursor.execution_mode == ExecutionMode.STREAMING_QUERY:
|
|
538
|
+
print("This is a streaming cursor")
|
|
539
|
+
"""
|
|
540
|
+
return self._execution_mode
|
|
541
|
+
|
|
542
|
+
@property
|
|
543
|
+
def is_streaming(self) -> bool:
|
|
544
|
+
"""
|
|
545
|
+
Check if this cursor is in streaming mode.
|
|
546
|
+
|
|
547
|
+
This is a convenience property equivalent to checking:
|
|
548
|
+
`cursor.execution_mode == ExecutionMode.STREAMING_QUERY`
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
True if the cursor is in streaming mode, False if in snapshot mode.
|
|
552
|
+
|
|
553
|
+
Example:
|
|
554
|
+
cursor = conn.streaming_cursor()
|
|
555
|
+
if cursor.is_streaming:
|
|
556
|
+
# Use non-blocking fetch pattern for streaming
|
|
557
|
+
while cursor.may_have_results:
|
|
558
|
+
row = cursor.fetchone()
|
|
559
|
+
if row:
|
|
560
|
+
process(row)
|
|
561
|
+
else:
|
|
562
|
+
time.sleep(0.1)
|
|
563
|
+
else:
|
|
564
|
+
# Use standard blocking fetch for snapshot
|
|
565
|
+
for row in cursor:
|
|
566
|
+
process(row)
|
|
567
|
+
"""
|
|
568
|
+
return self._execution_mode == ExecutionMode.STREAMING_QUERY
|
|
569
|
+
|
|
570
|
+
@property
|
|
571
|
+
def returns_changelog(self) -> bool:
|
|
572
|
+
"""
|
|
573
|
+
Check if the cursor returns changelog events with row data.
|
|
574
|
+
|
|
575
|
+
This property helps determine the result structure:
|
|
576
|
+
- True: Results are ChangeloggedRow namedtuples with (op, row)
|
|
577
|
+
- False: Results are plain rows (tuples or dicts)
|
|
578
|
+
|
|
579
|
+
For streaming non-append-only queries, this will be True,
|
|
580
|
+
meaning each result includes an operation type (INSERT, DELETE,
|
|
581
|
+
UPDATE_BEFORE, UPDATE_AFTER) along with the row data.
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
True if results include changelog operations, False otherwise.
|
|
585
|
+
Returns False if no statement has been executed yet.
|
|
586
|
+
|
|
587
|
+
Example:
|
|
588
|
+
cursor.execute("SELECT user_id, COUNT(*) FROM orders GROUP BY user_id")
|
|
589
|
+
if cursor.returns_changelog:
|
|
590
|
+
row = cursor.fetchone()
|
|
591
|
+
if row:
|
|
592
|
+
op, data = row # ChangeloggedRow unpacks to (operation, row_data)
|
|
593
|
+
print(f"Operation: {op}, Data: {data}")
|
|
594
|
+
else:
|
|
595
|
+
row = cursor.fetchone() # Plain tuple or dict
|
|
596
|
+
"""
|
|
597
|
+
if self._statement is None:
|
|
598
|
+
return False
|
|
599
|
+
# Changelog results occur when:
|
|
600
|
+
# 1. Query is streaming (not snapshot), AND
|
|
601
|
+
# 2. Query is not append-only (has updates/deletes)
|
|
602
|
+
return self.is_streaming and not self._statement.is_append_only
|
|
603
|
+
|
|
604
|
+
def changelog_compressor(self) -> ChangelogCompressor:
|
|
605
|
+
"""Create a changelog compressor for streaming non-append-only results.
|
|
606
|
+
|
|
607
|
+
The compressor accumulates and applies changelog operations to maintain
|
|
608
|
+
a logical result set that changes over time.
|
|
609
|
+
|
|
610
|
+
Returns:
|
|
611
|
+
A ChangelogCompressor instance appropriate for this cursor's configuration.
|
|
612
|
+
|
|
613
|
+
Raises:
|
|
614
|
+
InterfaceError: If the cursor is not configured for changelog results or
|
|
615
|
+
if there is no statement.
|
|
616
|
+
"""
|
|
617
|
+
if not self._statement:
|
|
618
|
+
raise InterfaceError("Cannot create changelog compressor without a statement")
|
|
619
|
+
|
|
620
|
+
return create_changelog_compressor(self, self._statement)
|
|
621
|
+
|
|
622
|
+
def _raise_if_closed(self) -> None:
|
|
623
|
+
"""Raise InterfaceError if the cursor or connection is closed."""
|
|
624
|
+
if self._closed:
|
|
625
|
+
raise InterfaceError("Cursor is closed")
|
|
626
|
+
if self._connection.is_closed:
|
|
627
|
+
raise InterfaceError("Connection is closed")
|
|
628
|
+
|
|
629
|
+
def _wait_for_statement_ready(self, timeout: int) -> None:
|
|
630
|
+
"""
|
|
631
|
+
Wait for self._statement to be ready (not in PENDING status).
|
|
632
|
+
Uses exponential backoff with jitter to prevent thundering herd problems.
|
|
633
|
+
|
|
634
|
+
Reassigns to self._statement with updated status on each poll.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
timeout: Maximum time to wait in seconds
|
|
638
|
+
|
|
639
|
+
Raises:
|
|
640
|
+
OperationalError: If polling times out or fails
|
|
641
|
+
"""
|
|
642
|
+
|
|
643
|
+
if self._statement is None:
|
|
644
|
+
raise InterfaceError(
|
|
645
|
+
"Calling _wait_for_statement_ready but _statement is None, this is probably a bug"
|
|
646
|
+
) # pragma: no cover
|
|
647
|
+
|
|
648
|
+
start_time = time.monotonic()
|
|
649
|
+
base_delay = 1.0 # Start with 1 second
|
|
650
|
+
max_delay = 30.0 # Maximum delay between polls
|
|
651
|
+
current_delay = base_delay
|
|
652
|
+
|
|
653
|
+
while time.monotonic() - start_time < timeout:
|
|
654
|
+
logger.info(f"Checking statement '{self._statement.name}' status...")
|
|
655
|
+
|
|
656
|
+
response = self._connection._get_statement(self._statement.name)
|
|
657
|
+
|
|
658
|
+
self._statement = statement = Statement.from_response(self._connection, response)
|
|
659
|
+
|
|
660
|
+
self._raise_if_statement_is_broken(statement)
|
|
661
|
+
|
|
662
|
+
# If we can fetch results based on execution mode and statement state,
|
|
663
|
+
# create the result reader and exit the polling loop.
|
|
664
|
+
if statement.can_fetch_results(self._execution_mode):
|
|
665
|
+
if statement.is_append_only:
|
|
666
|
+
# Create result reader for append-only statements, will
|
|
667
|
+
# return row tuples or dicts depending on self._results_as_dicts.
|
|
668
|
+
self._result_reader = AppendOnlyResultReader(
|
|
669
|
+
self._connection,
|
|
670
|
+
self._statement,
|
|
671
|
+
self._execution_mode,
|
|
672
|
+
as_dict=self._results_as_dicts,
|
|
673
|
+
)
|
|
674
|
+
else:
|
|
675
|
+
# Use a ChangelogEventReader that will return pairs of the changelog
|
|
676
|
+
# operation and the type-promoted row data as a dict or tuple depending
|
|
677
|
+
# on self._results_as_dicts.
|
|
678
|
+
self._result_reader = ChangelogEventReader(
|
|
679
|
+
self._connection,
|
|
680
|
+
self._statement,
|
|
681
|
+
self._execution_mode,
|
|
682
|
+
as_dict=self._results_as_dicts,
|
|
683
|
+
)
|
|
684
|
+
return
|
|
685
|
+
|
|
686
|
+
# Otherwise, exponential backoff with jitter to prevent thundering herd
|
|
687
|
+
jitter = random.uniform(0.75, 1.25) # ±25% randomness
|
|
688
|
+
actual_delay = current_delay * jitter
|
|
689
|
+
time.sleep(actual_delay)
|
|
690
|
+
current_delay = min(current_delay * 1.5, max_delay)
|
|
691
|
+
|
|
692
|
+
raise OperationalError(f"Statement submission timed out after {timeout} seconds")
|
|
693
|
+
|
|
694
|
+
def _raise_if_statement_is_broken(self, statement: Statement) -> None:
|
|
695
|
+
"""Raise an exception if the statement is in a failed, degraded, or pool-exhausted state.
|
|
696
|
+
|
|
697
|
+
Raises:
|
|
698
|
+
OperationalError: If the statement is failed or degraded.
|
|
699
|
+
ComputePoolExhaustedError: If the statement is pool-exhausted (a subclass of
|
|
700
|
+
OperationalError).
|
|
701
|
+
"""
|
|
702
|
+
|
|
703
|
+
if statement.is_failed:
|
|
704
|
+
raise OperationalError(
|
|
705
|
+
f"Statement '{statement.name}' failed: {statement.status.get('detail', '')}"
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
# If the statement is degraded (unbounded and in a bad state), hmm.
|
|
709
|
+
# For now, treat it as an error.
|
|
710
|
+
if statement.is_degraded:
|
|
711
|
+
raise OperationalError(
|
|
712
|
+
f"Statement '{statement.name}' is in DEGRADED state: {statement.status['detail']}"
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
# If was submitted to an overloaded compute pool, then at this
|
|
716
|
+
# time we choose to both _delete_ the statement and raise a specific
|
|
717
|
+
# exception.
|
|
718
|
+
if statement.is_pool_exhausted:
|
|
719
|
+
statement_deleted = False
|
|
720
|
+
try:
|
|
721
|
+
self.delete_statement()
|
|
722
|
+
statement_deleted = True
|
|
723
|
+
except Exception as e:
|
|
724
|
+
logger.error(f"Error deleting pool-exhausted statement {statement.name}: {e}")
|
|
725
|
+
|
|
726
|
+
# Build message based on whether deletion succeeded
|
|
727
|
+
if statement_deleted:
|
|
728
|
+
deletion_msg = "The statement has been deleted."
|
|
729
|
+
else:
|
|
730
|
+
deletion_msg = "The statement could not be deleted and may need manual cleanup."
|
|
731
|
+
|
|
732
|
+
# Subclass of OperationalError....
|
|
733
|
+
raise ComputePoolExhaustedError(
|
|
734
|
+
f"Statement '{statement.name}' was not accepted for execution due to compute"
|
|
735
|
+
f" pool exhaustion. {deletion_msg} Please retry your query.",
|
|
736
|
+
statement_name=statement.name,
|
|
737
|
+
statement_deleted=statement_deleted,
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
def _submit_statement(
|
|
741
|
+
self,
|
|
742
|
+
statement_text: str,
|
|
743
|
+
parameters: tuple | list | None = None,
|
|
744
|
+
statement_name: str | None = None,
|
|
745
|
+
statement_label: str | None = None,
|
|
746
|
+
) -> Statement:
|
|
747
|
+
"""
|
|
748
|
+
Submit a SQL statement for execution.
|
|
749
|
+
|
|
750
|
+
Args:
|
|
751
|
+
operation: The SQL statement to execute
|
|
752
|
+
parameters: Parameters for the SQL statement (optional)
|
|
753
|
+
statement_name: Optional name for the statement (defaults to DB-API UUID if
|
|
754
|
+
not provided)
|
|
755
|
+
statement_label: Optional label for the statement for easier identification in
|
|
756
|
+
server logs and UIs (defaults to None)
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
The submitted Statement object
|
|
760
|
+
|
|
761
|
+
Raises:
|
|
762
|
+
OperationalError: If statement submission fails
|
|
763
|
+
ProgrammingError: If template parameter interpolation fails
|
|
764
|
+
"""
|
|
765
|
+
logger.info(f"Submitting statement {statement_text}")
|
|
766
|
+
|
|
767
|
+
interpolated_statement = self._interpolate_parameters(statement_text, parameters)
|
|
768
|
+
|
|
769
|
+
logger.debug(f"Interpolated statement: {interpolated_statement}")
|
|
770
|
+
|
|
771
|
+
response = self._connection._execute_statement(
|
|
772
|
+
interpolated_statement,
|
|
773
|
+
self._execution_mode,
|
|
774
|
+
statement_name,
|
|
775
|
+
statement_label,
|
|
776
|
+
)
|
|
777
|
+
return Statement.from_response(self._connection, response)
|
|
778
|
+
|
|
779
|
+
def _interpolate_parameters(
|
|
780
|
+
self,
|
|
781
|
+
statement_template: str,
|
|
782
|
+
parameters: tuple | list | None = None,
|
|
783
|
+
) -> str:
|
|
784
|
+
"""Interpolate parameters (if any) into the statement template, returning
|
|
785
|
+
the final statement.
|
|
786
|
+
|
|
787
|
+
Raises ProgrammingError if wrong number of parameters provided.
|
|
788
|
+
"""
|
|
789
|
+
if parameters is None or len(parameters) == 0:
|
|
790
|
+
return statement_template
|
|
791
|
+
|
|
792
|
+
if not isinstance(parameters, (list, tuple)):
|
|
793
|
+
raise TypeError(f"Parameters must be a tuple or list, got {type(parameters)}")
|
|
794
|
+
|
|
795
|
+
# May raise InterfaceError if unsupported parameter type found
|
|
796
|
+
converted_params = convert_statement_parameters(parameters)
|
|
797
|
+
|
|
798
|
+
# Interpolate parameters using the %s placeholders in statement_template.
|
|
799
|
+
try:
|
|
800
|
+
interpolated_statement = statement_template % converted_params
|
|
801
|
+
except TypeError as e:
|
|
802
|
+
raise ProgrammingError(f"Error interpolating parameters into statement: {e}") from e
|
|
803
|
+
|
|
804
|
+
return interpolated_statement
|