confluent-sql 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1007 @@
1
+ """
2
+ Connection module for Confluent SQL DB-API driver.
3
+
4
+ This module provides the connect function and Connection class for establishing
5
+ connections to Confluent SQL services.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import uuid
12
+ import warnings
13
+ from collections import namedtuple
14
+ from collections.abc import Generator
15
+ from contextlib import contextmanager
16
+ from dataclasses import fields, is_dataclass
17
+ from typing import Any
18
+
19
+ import httpx
20
+
21
+ from .__version__ import VERSION
22
+ from .cursor import Cursor
23
+ from .exceptions import InterfaceError, OperationalError, StatementDeletedError
24
+ from .execution_mode import ExecutionMode
25
+ from .statement import LABEL_PREFIX as STATEMENT_LABEL_PREFIX
26
+ from .statement import ChangelogRow, Statement
27
+ from .types import RowPythonTypes
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def connect( # noqa: PLR0913
33
+ flink_api_key: str,
34
+ flink_api_secret: str,
35
+ environment: str,
36
+ compute_pool_id: str,
37
+ organization_id: str,
38
+ cloud_provider: str,
39
+ cloud_region: str,
40
+ api_key: str | None = None,
41
+ api_secret: str | None = None,
42
+ dbname: str | None = None,
43
+ result_page_fetch_pause_millis: int = 100,
44
+ http_user_agent: str | None = None,
45
+ ) -> Connection:
46
+ """
47
+ Create a connection to a Confluent SQL service.
48
+
49
+ Args:
50
+ flink_api_key: Flink API key
51
+ flink_api_secret: Flink API secret
52
+ environment: Environment ID
53
+ compute_pool_id: Compute pool ID for SQL execution
54
+ organization_id: Organization ID
55
+ cloud_provider: Cloud provider (e.g., "aws", "gcp", "azure")
56
+ cloud_region: Cloud region (e.g., "us-east-2", "us-west-2")
57
+ api_key: Confluent Cloud API key (optional, for general Confluent Cloud resources)
58
+ api_secret: Confluent Cloud API secret (optional)
59
+ dbname: The name of the database to use (optional)
60
+ result_page_fetch_pause_millis: Maximum milliseconds to wait between fetching pages of
61
+ statement results (per statement). Defaults to 100ms. Prevents tight loops of requests
62
+ to the statement results API when consuming results for a statement, especially when
63
+ no results are currently available but more may be forthcoming, such as when
64
+ consuming results from a running streaming query, or prior to when the first page
65
+ of results is ready for a snapshot query.
66
+
67
+ If it has already been at least this long since the most recent fetch of results for the
68
+ statement, then no delay will happen.
69
+ http_user_agent: User-Agent header value for HTTP requests. Must be a string
70
+ between 1-100 characters. Defaults to
71
+ "Confluent-SQL-Dbapi/v<version> (https://confluent.io; support@confluent.io)"
72
+ where version is from __version__.py
73
+
74
+ Returns:
75
+ A Connection object representing the database connection
76
+
77
+ Raises:
78
+ InterfaceError: If connection parameters are invalid
79
+ OperationalError: If connection cannot be established
80
+ """
81
+
82
+ if not environment:
83
+ raise InterfaceError("Environment ID is required")
84
+
85
+ if not compute_pool_id:
86
+ raise InterfaceError("Compute pool ID is required")
87
+
88
+ if not organization_id:
89
+ raise InterfaceError("Organization ID is required")
90
+
91
+ if not cloud_provider:
92
+ raise InterfaceError("Cloud provider is required")
93
+
94
+ if not cloud_region:
95
+ raise InterfaceError("Cloud region is required")
96
+
97
+ if not flink_api_key or not flink_api_secret:
98
+ raise InterfaceError("Flink API key and secret are required")
99
+
100
+ return Connection(
101
+ flink_api_key,
102
+ flink_api_secret,
103
+ environment,
104
+ compute_pool_id,
105
+ organization_id,
106
+ cloud_provider,
107
+ cloud_region,
108
+ api_key=api_key,
109
+ api_secret=api_secret,
110
+ dbname=dbname,
111
+ statement_results_page_fetch_pause_millis=result_page_fetch_pause_millis,
112
+ http_user_agent=http_user_agent,
113
+ )
114
+
115
+
116
+ class Connection:
117
+ """
118
+ A connection to a Confluent SQL service.
119
+
120
+ This class represents a connection to a Confluent SQL service and provides
121
+ methods for creating cursors and managing the connection lifecycle.
122
+ """
123
+
124
+ DEFAULT_USER_AGENT = (
125
+ f"Confluent-SQL-Dbapi/v{VERSION} (https://confluent.io; support@confluent.io)"
126
+ )
127
+
128
+ environment: str
129
+ organization_id: str
130
+ compute_pool_id: str
131
+ api_key: str | None
132
+ api_secret: str | None
133
+ host: str | None
134
+ statement_results_page_fetch_pause_secs: float
135
+ """Maximum seconds to wait between fetching pages of statement
136
+ results (per statement). Prevents tight loops of requests to the
137
+ statement results API when consuming results for a statement, especially when no results are
138
+ currently available but more may be forthcoming, such as when consuming results from
139
+ a running streaming query, or prior to when the first page of results is ready for
140
+ a snapshot query.
141
+
142
+ If it has already been at least this long since the most recent fetch of results for the
143
+ statement, then no delay will happen.
144
+
145
+ Referenced by the result reader when fetching pages of results for individual
146
+ statements.
147
+ """
148
+
149
+ _closed: bool
150
+ _dbname: str | None
151
+ _client: httpx.Client
152
+ _http_user_agent: str
153
+
154
+ _row_type_registry: RowTypeRegistry
155
+ """Registry for user-defined row types, see register_row_type()."""
156
+
157
+ _snapshot_warning_issued: bool
158
+ """Internal flag to track whether the snapshot query early access warning has been issued.
159
+ Remove after snapshot queries reach open preview (expected May 2026)."""
160
+
161
+ def __init__( # noqa: PLR0913
162
+ self,
163
+ flink_api_key: str,
164
+ flink_api_secret: str,
165
+ environment: str,
166
+ compute_pool_id: str,
167
+ organization_id: str,
168
+ cloud_provider: str,
169
+ cloud_region: str,
170
+ api_key: str | None = None,
171
+ api_secret: str | None = None,
172
+ host: str | None = None,
173
+ dbname: str | None = None,
174
+ statement_results_page_fetch_pause_millis: int = 100,
175
+ http_user_agent: str | None = None,
176
+ ):
177
+ """
178
+ Initialize a new connection to a Confluent SQL service.
179
+
180
+ Args:
181
+ flink_api_key: Flink API key
182
+ flink_api_secret: Flink API secret
183
+ environment: Environment ID
184
+ compute_pool_id: Compute pool ID for SQL execution
185
+ organization_id: Organization ID
186
+ cloud_provider: Cloud provider
187
+ cloud_region: Cloud region (e.g., "us-east-2", "us-west-2")
188
+ result_page_fetch_pause_millis: Milliseconds to possibly wait between fetching pages of
189
+ statement results. Defaults to 100ms. If most recent fetch of results for a
190
+ statement was more than this long ago, then no delay will happen when fetching
191
+ the next page of results for the statement.
192
+ api_key: Confluent Cloud API key for general Confluent Cloud resources (optional)
193
+ api_secret: Confluent Cloud API secret for general Confluent Cloud resources (optional)
194
+ host: The base URL for Confluent Cloud API (optional)
195
+ dbname: The name of the database to use (optional)
196
+ http_user_agent: User-Agent header for HTTP requests. String, 1-100 chars.
197
+ Defaults to the value of DEFAULT_USER_AGENT, which includes the
198
+ driver name/version, documentation URL, and support email.
199
+ """
200
+ self.environment = environment
201
+ self.compute_pool_id = compute_pool_id
202
+ self.organization_id = organization_id
203
+ self.api_key = api_key
204
+ self.api_secret = api_secret
205
+ self.host = host
206
+
207
+ if statement_results_page_fetch_pause_millis < 0:
208
+ raise InterfaceError("result_page_fetch_pause_millis must be non-negative")
209
+
210
+ # Will be referenced by cursor / result reader when
211
+ # fetching pages of results for individual statements.
212
+ self.statement_results_page_fetch_pause_secs = (
213
+ statement_results_page_fetch_pause_millis / 1000.0
214
+ )
215
+
216
+ # Internal state
217
+ self._closed = False
218
+ self._dbname = dbname
219
+
220
+ # Set user agent (validation happens in setter, default if None)
221
+ self.http_user_agent = (
222
+ http_user_agent if http_user_agent is not None else self.DEFAULT_USER_AGENT
223
+ )
224
+
225
+ # Create httpx client for making API calls
226
+ if self.host is None:
227
+ self.host = f"https://flink.{cloud_region}.{cloud_provider}.confluent.cloud"
228
+ base_url = f"{self.host}/sql/v1/organizations/{organization_id}/environments/{environment}"
229
+
230
+ # Create httpx client for making API calls
231
+ basic_auth = httpx.BasicAuth(username=flink_api_key, password=flink_api_secret)
232
+ self._client = httpx.Client(
233
+ auth=basic_auth,
234
+ base_url=base_url,
235
+ headers={
236
+ "Content-Type": "application/json",
237
+ "User-Agent": self._http_user_agent,
238
+ },
239
+ )
240
+
241
+ self._row_type_registry = RowTypeRegistry()
242
+
243
+ # TODO: remove after snapshot queries reach open preview (May 2026)
244
+ self._snapshot_warning_issued = False
245
+
246
+ def close(self) -> None:
247
+ """
248
+ Close the connection.
249
+ """
250
+ if not self._closed:
251
+ self._closed = True
252
+ self._client.close()
253
+ else:
254
+ logger.info("Trying to close a closed connection, ignoring")
255
+
256
+ def cursor(self, *, as_dict: bool = False, mode=ExecutionMode.SNAPSHOT) -> Cursor:
257
+ """
258
+ Create a new cursor for executing statements. Defaults to creating
259
+ a snapshot (bounded) query cursor for returning point-in-time results.
260
+
261
+ Snapshot queries will return results from a consistent point in time, and
262
+ the result stream is considered both bounded and append-only, and will
263
+ only be generated when the query execution has completed, having consumed
264
+ all source data as of the query start time.
265
+
266
+ Streaming queries will return results as they are produced by the executing query,
267
+ but may or may not be append-only depending on the query characteristics. For example,
268
+ a streaming query that only filters from source tables (Kafka topics) will be append-only,
269
+ but a streaming query that performs aggregations or joins will not be, as updates to
270
+ previously emitted results may occur as more data is processed. Non-append-only streaming
271
+ query results will include a changelog operation with each row indicating whether the row
272
+ is an insertion, update, or deletion, indicated by the 'op' field in the returned
273
+ ChangeloggedRow namedtuple.
274
+
275
+ So, while mode=ExecutionMode.STREAMING_QUERY will always initiate a streaming query,
276
+ the presence of changelog operations in the results depends on whether the
277
+ query submitted will result in append-only processing or not.
278
+
279
+ See the documentation in the Cursor class for more details on the behavior
280
+ of the cursor, its fetch method and iteration behavior, as to the differences
281
+ between snapshot and streaming queries.
282
+
283
+ The cursor's fetch methods return different types based on configuration
284
+ and query characteristics:
285
+
286
+ Return Type Matrix
287
+ ------------------
288
+ 1. **Append-only queries + as_dict=False** (default):
289
+ Returns tuples: `("val1", "val2", ...)`
290
+ Standard DB-API format for regular SELECT queries
291
+
292
+ 2. **Append-only queries + as_dict=True**:
293
+ Returns dicts: `{"col1": "val1", "col2": "val2"}`
294
+ Column names as keys for better readability
295
+
296
+ 3. **Changelog queries + as_dict=False** (streaming non-append-only, row as tuples):
297
+ Returns ChangeloggedRow namedtuples: `ChangeloggedRow(op=Op.INSERT,row=("v1", "v2"))`
298
+ Includes operation type (INSERT/UPDATE_BEFORE/UPDATE_AFTER/DELETE) with row data
299
+
300
+ 4. **Changelog queries + as_dict=True** (streaming non-append-only, row as dicts):
301
+ Returns ChangeloggedRow with dict: `ChangeloggedRow(op=Op.INSERT, row={"col1": "val1"})`
302
+ Combines operation tracking with named column access
303
+
304
+ Args:
305
+ as_dict: If True, return row data as dictionaries with column names as keys.
306
+ If False (default), return row data as tuples.
307
+ mode: The execution mode for the cursor. Defaults to SNAPSHOT for bounded
308
+ queries. Use STREAMING_QUERY for continuous/unbounded queries.
309
+
310
+ Returns:
311
+ A new Cursor object associated with this connection
312
+
313
+ Raises:
314
+ InterfaceError: If the connection is closed
315
+
316
+ Examples:
317
+ # Standard snapshot query with tuples
318
+ cursor = conn.cursor()
319
+ cursor.execute("SELECT * FROM users")
320
+ assert not cursor.is_streaming
321
+ row = cursor.fetchone() # Returns: ("Alice", 25), or None if no more rows, period.
322
+
323
+ # Snapshot query with dicts
324
+ cursor = conn.cursor(as_dict=True)
325
+ cursor.execute("SELECT * FROM users")
326
+ assert cursor.as_dict == True
327
+ assert not cursor.is_streaming
328
+ row = cursor.fetchone() # Returns: {"name": "Alice", "age": 25} or None if no more rows
329
+
330
+ # Streaming append-only query with tuples
331
+ cursor = conn.cursor(mode=ExecutionMode.STREAMING_QUERY)
332
+ assert cursor.is_streaming
333
+ cursor.execute("SELECT user_id FROM orders")
334
+ assert not cursor.returns_changelog # Will not be known until after execute().
335
+ while cursor.may_have_results:
336
+ # Returns either ("Alice",) or None if _no data available at this time_.
337
+ row = cursor.fetchone()
338
+ if row is not None:
339
+ ...
340
+
341
+ # Streaming changelog query
342
+ cursor = conn.cursor(mode=ExecutionMode.STREAMING_QUERY)
343
+ cursor.execute("SELECT user_id, count(*) from orders group by user_id")
344
+ assert cursor.is_streaming
345
+ assert cursor.returns_changelog
346
+ while cursor.may_have_results:
347
+ row = cursor.fetchone()
348
+ # may return None if _no data available at this time_
349
+ if row is not None:
350
+ # Returns a ChangeloggedRow namedtuple:
351
+ # ChangeloggedRow(op=Op.INSERT, row=("Alice", 25))
352
+
353
+
354
+ """
355
+ if self._closed:
356
+ raise InterfaceError("Connection is closed")
357
+
358
+ # TODO: remove after snapshot queries reach open preview (May 2026)
359
+ if mode.is_snapshot and not self._snapshot_warning_issued:
360
+ self._snapshot_warning_issued = True
361
+ warnings.warn(
362
+ "Snapshot queries on Confluent Cloud Flink SQL are currently in "
363
+ "Early Access and may be subject to change.",
364
+ stacklevel=2,
365
+ )
366
+
367
+ return Cursor(self, as_dict=as_dict, execution_mode=mode)
368
+
369
+ def streaming_cursor(self, *, as_dict: bool = False) -> Cursor:
370
+ """
371
+ Create a streaming query cursor. Waits for RUNNING, iterates over continuous results.
372
+
373
+ This is a convenience method equivalent to:
374
+ `cursor(as_dict=as_dict, mode=ExecutionMode.STREAMING_QUERY)`
375
+
376
+ For streaming queries, the return type depends on whether the query is append-only:
377
+ - Append-only: Returns tuples or dicts based on as_dict parameter
378
+ - Non-append-only: Returns ChangeloggedRow namedtuples containing operation and row data
379
+
380
+ See cursor() method documentation for detailed return type information.
381
+
382
+ Args:
383
+ as_dict: If True, return row data as dictionaries. If False, as tuples.
384
+
385
+ Returns:
386
+ A new Cursor configured for streaming query execution
387
+ """
388
+ return Cursor(self, as_dict=as_dict, execution_mode=ExecutionMode.STREAMING_QUERY)
389
+
390
+ @contextmanager
391
+ def closing_cursor(
392
+ self, *, as_dict: bool = False, mode: ExecutionMode = ExecutionMode.SNAPSHOT
393
+ ) -> Generator[Cursor, None, None]:
394
+ """
395
+ Context manager for creating and automatically closing a cursor.
396
+
397
+ Creates a cursor with the same return type variations as cursor() method.
398
+ See cursor() documentation for details on the four possible return types
399
+ based on as_dict and query characteristics (append-only vs changelog).
400
+
401
+ Args:
402
+ as_dict: If True, fetch results as dictionaries, otherwise as tuples
403
+ mode: The execution mode for the cursor. Defaults to SNAPSHOT.
404
+
405
+ Yields:
406
+ A new Cursor object associated with this connection
407
+
408
+ Raises:
409
+ InterfaceError: If the connection is closed
410
+
411
+ Example:
412
+ with conn.closing_cursor(as_dict=True) as cursor:
413
+ cursor.execute("SELECT * FROM users")
414
+ for row in cursor:
415
+ print(row) # Prints dicts with column names
416
+ # cursor is automatically closed after the with block
417
+ """
418
+ cursor = self.cursor(as_dict=as_dict, mode=mode)
419
+ try:
420
+ yield cursor
421
+ finally:
422
+ cursor.close()
423
+
424
+ @contextmanager
425
+ def closing_streaming_cursor(self, *, as_dict: bool = False) -> Generator[Cursor, None, None]:
426
+ """
427
+ Context manager for creating and automatically closing a streaming cursor.
428
+
429
+ Convenience method equivalent to:
430
+ closing_cursor(as_dict=as_dict, mode=ExecutionMode.STREAMING_QUERY)
431
+
432
+ Creates a streaming cursor that processes continuous data from Flink SQL
433
+ with automatic cleanup. Streaming cursors return data as it arrives without
434
+ blocking or collecting all results into memory.
435
+
436
+ Statement Lifecycle Management:
437
+ The context manager automatically closes the cursor via cursor.close(),
438
+ which makes a best-effort attempt to delete statements that are already
439
+ in terminal phases (COMPLETED/FAILED/STOPPED). Deletion errors are
440
+ logged and suppressed, so server-side cleanup is not strictly
441
+ guaranteed. Long-running streaming queries that remain RUNNING on the
442
+ server after exiting the context manager are NOT automatically stopped
443
+ or deleted server-side.
444
+
445
+ To explicitly stop a RUNNING streaming statement, call
446
+ cursor.delete_statement() or connection.delete_statement(statement_id)
447
+ before exiting the context manager.
448
+
449
+ Args:
450
+ as_dict: If True, fetch results as dictionaries, otherwise as tuples
451
+
452
+ Yields:
453
+ A new streaming Cursor object associated with this connection
454
+
455
+ Raises:
456
+ InterfaceError: If the connection is closed
457
+
458
+ Example:
459
+ with conn.closing_streaming_cursor(as_dict=True) as cursor:
460
+ cursor.execute("SELECT * FROM orders WHERE amount > %s", (1000,))
461
+ while cursor.may_have_results:
462
+ rows = cursor.fetchmany(10)
463
+ if rows:
464
+ for row in rows:
465
+ process(row)
466
+ else:
467
+ time.sleep(0.1)
468
+ # cursor is automatically closed after the with block
469
+ """
470
+ with self.closing_cursor(as_dict=as_dict, mode=ExecutionMode.STREAMING_QUERY) as cursor:
471
+ yield cursor
472
+
473
+ def execute_snapshot_ddl(
474
+ self,
475
+ statement_text: str,
476
+ parameters: tuple | list | None = None,
477
+ timeout: int = 3000,
478
+ statement_name: str | None = None,
479
+ statement_label: str | None = None,
480
+ ) -> Statement:
481
+ """Execute bounded DDL that completes after consuming snapshot data.
482
+
483
+ Use for statements like:
484
+ - CREATE TABLE (not AS SELECT)
485
+ - DROP TABLE
486
+ - ALTER TABLE
487
+ - CREATE VIEW
488
+ - DROP VIEW
489
+ - CREATE TABLE foo AS SELECT ... (snapshot mode, where the SELECT portion completes
490
+ with snapshot behavior)
491
+
492
+
493
+ Args:
494
+ statement_text: The DDL statement to execute
495
+ parameters: Optional statement parameters
496
+ timeout: Maximum time to wait for completion in seconds
497
+ statement_name: Optional name for the statement
498
+ statement_label: Optional label for the statement. Labels can be used to
499
+ group and manage related statements. The label will be
500
+ prefixed with "user.confluent.io/" when stored but you only
501
+ need to provide the label value itself (e.g., "my-ddl-batch")
502
+
503
+ Returns:
504
+ Statement for managing the statement lifecycle
505
+
506
+ Raises:
507
+ OperationalError: If statement fails or times out
508
+ ProgrammingError: If statement is invalid
509
+ """
510
+ with self.closing_cursor(mode=ExecutionMode.SNAPSHOT_DDL) as cur:
511
+ cur.execute(
512
+ statement_text,
513
+ parameters,
514
+ timeout=timeout,
515
+ statement_name=statement_name,
516
+ statement_label=statement_label,
517
+ )
518
+
519
+ # Return the last version of the statement
520
+ return cur.statement
521
+
522
+ def execute_streaming_ddl(
523
+ self,
524
+ statement_text: str,
525
+ parameters: tuple | list | None = None,
526
+ timeout: int = 3000,
527
+ statement_name: str | None = None,
528
+ statement_label: str | None = None,
529
+ ) -> Statement:
530
+ """Execute unbounded DDL that starts a streaming job.
531
+
532
+ Use for statements like:
533
+ - CREATE TABLE ... AS SELECT ... (streaming mode, where the SELECT portion is unbounded)
534
+ - CREATE MATERIALIZED TABLE ... (streaming mode, where the table is populated by an
535
+ unbounded streaming job but the overall CREATE statement
536
+ itself completes once the population job is started)
537
+
538
+ Args:
539
+ statement_text: The DDL statement to execute
540
+ parameters: Optional statement parameters
541
+ timeout: Maximum time to wait for completion in seconds
542
+ statement_name: Optional name for the statement
543
+ statement_label: Optional label for the statement. Labels can be used to
544
+ group and manage related statements. The label will be
545
+ prefixed with "user.confluent.io/" when stored but you only
546
+ need to provide the label value itself (e.g., "streaming-jobs")
547
+ Returns:
548
+ Statement for any further management of the statement lifecycle
549
+ """
550
+
551
+ with self.closing_cursor(mode=ExecutionMode.STREAMING_DDL) as cur:
552
+ cur.execute(
553
+ statement_text,
554
+ parameters,
555
+ timeout=timeout,
556
+ statement_name=statement_name,
557
+ statement_label=statement_label,
558
+ )
559
+
560
+ return cur.statement
561
+
562
+ def list_statements(self, *, label: str, page_size: int = 100) -> list[Statement]:
563
+ """Return a list of Statement objects for statements with the given label.
564
+
565
+ This method retrieves all statements that were created with the specified label,
566
+ which is useful for managing groups of related statements. The method handles
567
+ pagination automatically to retrieve all matching statements.
568
+
569
+ Args:
570
+ label: The label to filter statements by. You can provide either:
571
+ - Just the label value (e.g., "my-batch-job") - the "user.confluent.io/"
572
+ prefix will be added automatically
573
+ - The full label with prefix (e.g., "user.confluent.io/my-batch-job")
574
+ page_size: Number of statements to fetch per API request (default: 100).
575
+ The method will automatically paginate through all results.
576
+
577
+ Returns:
578
+ A list of Statement objects that have the specified label. Returns an
579
+ empty list if no statements match the label.
580
+
581
+ Raises:
582
+ OperationalError: If the API request fails
583
+
584
+ Example:
585
+ # Submit statements with a label
586
+ cursor.execute("SELECT * FROM users", statement_label="daily-report")
587
+ cursor.execute("SELECT * FROM orders", statement_label="daily-report")
588
+
589
+ # Later, retrieve all statements with that label
590
+ statements = connection.list_statements(label="daily-report")
591
+
592
+ # Delete all statements with the label
593
+ for stmt in statements:
594
+ connection.delete_statement(stmt)
595
+ """
596
+
597
+ if not label.startswith(STATEMENT_LABEL_PREFIX):
598
+ # Append prefix and make it a label selector for the API query parameter. The API
599
+ # expects the full label key, which includes the prefix, but we want to allow users
600
+ # to filter by just the end-user portion of the label.
601
+ adjusted_label_filter = f"{STATEMENT_LABEL_PREFIX}{label}=true"
602
+ else:
603
+ adjusted_label_filter = f"{label}=true"
604
+
605
+ statements: list[Statement] = []
606
+
607
+ has_more_pages = True
608
+ next_page_token: str | None = None
609
+ # Use the `label_selector` query parameter to filter statements by label
610
+ # on the server side.
611
+ parameters = {"label_selector": adjusted_label_filter, "page_size": page_size}
612
+ while has_more_pages:
613
+ response = self._request("/statements", params=parameters)
614
+ resp_json = response.json()
615
+ statements_json = resp_json.get("data", [])
616
+ statements.extend(Statement.from_response(self, s) for s in statements_json)
617
+
618
+ # Check if there are more pages to fetch based on the presence of a 'next' link in the
619
+ # response metadata. The 'next' value will be an entire URL, but we just need to extract
620
+ # the page token from it for the next request.
621
+ next_page_token = self._get_next_page_token(resp_json.get("metadata", {}).get("next"))
622
+ if next_page_token:
623
+ parameters["page_token"] = next_page_token
624
+ has_more_pages = next_page_token is not None
625
+
626
+ return statements
627
+
628
+ def delete_statement(self, statement: str | Statement) -> None:
629
+ """
630
+ Delete a statement by name or Statement object.
631
+
632
+ In Flink SQL, executed statements (especially streaming ones) create
633
+ resources that linger on within CCLoud until explicitly deleted (or
634
+ have stopped and enough time has passed for automatic cleanup).
635
+
636
+ Deleting a RUNNING statement will stop it first.
637
+
638
+ Args:
639
+ statement: The name of the statement to delete, or the Statement object. If passed
640
+ a Statement object that is already deleted, the deletion is ignored. However, if
641
+ passed a Statement object representing a still running statement, the delete
642
+ operation will be performed, causing the statement to be stopped and deleted.
643
+ """
644
+
645
+ if isinstance(statement, Statement):
646
+ if statement.is_deleted:
647
+ logger.info(f"Statement {statement.name} is already deleted, ignoring")
648
+ return
649
+ statement_name = statement.name
650
+ else:
651
+ if not isinstance(statement, str):
652
+ raise TypeError(
653
+ "Statement to delete must be specified by name or Statement object, "
654
+ f"got {type(statement)}"
655
+ )
656
+
657
+ statement_name = statement
658
+
659
+ logger.info(f"Deleting statement {statement_name}")
660
+ response = self._request(
661
+ f"/statements/{statement_name}", method="DELETE", raise_for_status=False
662
+ )
663
+ try:
664
+ response.raise_for_status()
665
+ except httpx.HTTPStatusError as e:
666
+ if e.response.status_code != 404:
667
+ raise OperationalError("Error deleting statement") from e
668
+ # If the response is 404, it means we don't need to delete the statement.
669
+ logger.info(f"Statement '{statement_name}' not found while deleting, ignoring")
670
+
671
+ if isinstance(statement, Statement):
672
+ # Mark the Statement object as deleted for if the caller still is interested in its
673
+ # reference.
674
+ statement.set_deleted()
675
+
676
+ @property
677
+ def is_closed(self) -> bool:
678
+ """
679
+ Check if the connection is closed.
680
+
681
+ Returns:
682
+ True if the connection is closed, False otherwise
683
+ """
684
+ return self._closed
685
+
686
+ @property
687
+ def http_user_agent(self) -> str:
688
+ """
689
+ Get the User-Agent header value sent with all HTTP requests.
690
+
691
+ Returns:
692
+ The current User-Agent string
693
+ """
694
+ return self._http_user_agent
695
+
696
+ @http_user_agent.setter
697
+ def http_user_agent(self, value: str) -> None:
698
+ """
699
+ Set the User-Agent header value for all HTTP requests made by this connection.
700
+
701
+ The User-Agent identifies the client software making requests to Confluent Cloud.
702
+ This is useful for tracking, debugging, and analytics purposes.
703
+
704
+ Args:
705
+ value: The User-Agent string to use. Must be a non-empty string between
706
+ 1 and 100 characters in length.
707
+
708
+ Raises:
709
+ InterfaceError: If value is not a string, is empty, or exceeds 100 characters
710
+
711
+ Example:
712
+ conn.http_user_agent = "my-app/1.0"
713
+ """
714
+ if not isinstance(value, str):
715
+ raise InterfaceError(f"http_user_agent must be a string, got {type(value).__name__}")
716
+
717
+ if len(value) < 1 or len(value) > 100:
718
+ raise InterfaceError(
719
+ f"http_user_agent length must be between 1 and 100 characters, got {len(value)}"
720
+ )
721
+
722
+ self._http_user_agent = value
723
+
724
+ # Update the httpx client headers if client is already initialized
725
+ if hasattr(self, "_client"):
726
+ self._client.headers["User-Agent"] = value
727
+
728
+ def register_row_type(self, class_for_flink_row: type[RowPythonTypes]) -> None:
729
+ """Register a user-defined namedtuple, NamedTuple, or @dataclass class to be used
730
+ to return deserialized ROW values.
731
+
732
+ The user-provided class to use when deserializing a ROW in any particular resultset is
733
+ determined by matching the sequence of ROW field names to the ordered sequence of declared
734
+ field names in the user-provided namedtuple, NamedTuple or @dataclass class.
735
+
736
+ If no user-registered class matches the field names of a ROW type in a resultset,
737
+ a new namedtuple class will be created and cached for future use.
738
+ """
739
+
740
+ self._row_type_registry.register_row_type(class_for_flink_row)
741
+
742
+ def _execute_statement(
743
+ self,
744
+ statement: str,
745
+ execution_mode: ExecutionMode,
746
+ statement_name: str | None = None,
747
+ statement_label: str | None = None,
748
+ ) -> dict[str, Any]:
749
+ """
750
+ Execute a SQL statement and return the response.
751
+
752
+ Args:
753
+ statement: The SQL statement to execute
754
+ parameters: Parameters for the SQL statement (optional)
755
+ statement_name: Optional name for the statement (defaults to 'dbapi-{uuid}')
756
+ statement_label: Optional label for the statement for easier identification in
757
+ server logs and UIs (defaults to None).
758
+
759
+ Returns:
760
+ Dictionary containing the API response
761
+
762
+ Raises:
763
+ OperationalError: If statement execution fails
764
+ """
765
+
766
+ # Create the statement payload as per Flink SQL API documentation
767
+ if statement_name is None:
768
+ statement_name = f"dbapi-{str(uuid.uuid4())}"
769
+
770
+ # Each connection uses a single environment, also
771
+ # called catalog, so we set the property here
772
+ properties = {"sql.current-catalog": self.environment}
773
+
774
+ if self._dbname is not None:
775
+ properties["sql.current-database"] = self._dbname
776
+
777
+ if execution_mode.is_snapshot:
778
+ # Ask for snapshot mode behavior -- point-in-time results.
779
+ properties["sql.snapshot.mode"] = "now"
780
+
781
+ payload = {
782
+ "name": statement_name,
783
+ "organization_id": self.organization_id,
784
+ "environment_id": self.environment,
785
+ "spec": {
786
+ "statement": statement,
787
+ "properties": properties,
788
+ "compute_pool_id": self.compute_pool_id,
789
+ "stopped": False,
790
+ },
791
+ }
792
+
793
+ if statement_label is not None:
794
+ # Guard against user already including the mandatory prefix.
795
+ if statement_label.startswith(STATEMENT_LABEL_PREFIX):
796
+ label_key = statement_label
797
+ else:
798
+ label_key = f"{STATEMENT_LABEL_PREFIX}{statement_label}"
799
+
800
+ payload["metadata"] = {
801
+ "labels": {label_key: "true"},
802
+ }
803
+
804
+ # Submit statement using the API
805
+ res = self._request("/statements", method="POST", json=payload)
806
+ return res.json()
807
+
808
+ def _get_statement(self, statement_name: str) -> dict[str, Any]:
809
+ """
810
+ Get the current structure of a statement.
811
+
812
+ Args:
813
+ statement_name: The name of the statement to check
814
+
815
+ Returns:
816
+ Dictionary containing the statement status and details
817
+
818
+ Raises:
819
+ OperationalError: If status check fails
820
+ """
821
+ return self._request(f"/statements/{statement_name}").json()
822
+
823
+ def _get_statement_results(
824
+ self, statement_name: str, next_url: str | None
825
+ ) -> tuple[list[ChangelogRow], str | None]:
826
+ """
827
+ Try to get a page of results for a statement.
828
+
829
+ Args:
830
+ statement_name: The name of the statement
831
+ next_url: Optional full URL to fetch the next page of results from. If None, then
832
+ the results endpoint for the statement will be used.
833
+
834
+ Returns:
835
+ A 2-tuple: (list of results in changelog row format, optional url to fetch next page.)
836
+ If the next page URL is None, there are no more pages to fetch.
837
+
838
+ Raises:
839
+ StatementDeletedError: If the statement has been deleted (404)
840
+ OperationalError: If results retrieval fails for other reasons
841
+ """
842
+ if next_url is None:
843
+ next_url = f"/statements/{statement_name}/results"
844
+
845
+ try:
846
+ response = self._request(next_url).json()
847
+ except OperationalError as e:
848
+ # Check if this is a 404 error indicating the statement was deleted
849
+ if "404" in str(e):
850
+ raise StatementDeletedError(
851
+ f"Statement '{statement_name}' has been deleted", statement_name
852
+ ) from e
853
+ raise
854
+
855
+ # Check if the response indicates an error (e.g., statement not found)
856
+ # Some APIs return 200 OK with an error payload instead of proper HTTP status codes
857
+ if response is None:
858
+ raise StatementDeletedError(
859
+ f"Statement '{statement_name}' has been deleted", statement_name
860
+ )
861
+
862
+ # Promote from the pure from-response-json 'data' sub-member list of dicts
863
+ # to a list of ChangelogRow.
864
+ data_list = response.get("results", {}).get("data")
865
+ if data_list is None:
866
+ # Check if this is an error response indicating the statement was deleted
867
+ error = response.get("error")
868
+ if error:
869
+ error_code = error.get("code")
870
+ if error_code == 404 or "not found" in str(error).lower():
871
+ raise StatementDeletedError(
872
+ f"Statement '{statement_name}' has been deleted", statement_name
873
+ )
874
+ raise OperationalError(f"Error fetching results: {error}")
875
+ # If no error but data is None, treat as deleted statement
876
+ raise StatementDeletedError(
877
+ f"Statement '{statement_name}' has been deleted or returned invalid response",
878
+ statement_name,
879
+ )
880
+
881
+ # Promote to ChangelogRow namedtuples, which include the 'op' field for changelog queries,
882
+ # defaulting to 0 (INSERT) if not present. If no (new) results are currently available, this
883
+ # will be an empty list.
884
+ results: list[ChangelogRow] = [
885
+ # 'op' may be omitted, in which case we assume 0 (INSERT)
886
+ ChangelogRow(r.get("op", 0), r["row"])
887
+ for r in data_list
888
+ ]
889
+
890
+ logger.info(f"got {len(results)} changelog rows for statement {statement_name}")
891
+ next_url = response.get("metadata", {}).get("next") or None
892
+
893
+ return (results, next_url)
894
+
895
+ def _request(self, url, method="GET", raise_for_status=True, **kwargs) -> httpx.Response:
896
+ if self._closed:
897
+ raise InterfaceError("Connection is closed")
898
+
899
+ try:
900
+ response = self._client.request(method, url, **kwargs)
901
+ logger.debug("Response: %s", response.content)
902
+ if raise_for_status:
903
+ response.raise_for_status()
904
+ return response
905
+ except httpx.HTTPStatusError as e:
906
+ try:
907
+ res = e.response.json()
908
+ errors = res.get("errors", [])
909
+ details = "; ".join([err["detail"] for err in errors])
910
+ except Exception:
911
+ details = "no more details"
912
+
913
+ raise OperationalError(
914
+ f"error sending request '{e.response.status_code}' - {details}"
915
+ ) from e
916
+
917
+ def _get_next_page_token(self, next_url: str | None) -> str | None:
918
+ """Extract the next page token from the next_url, if present."""
919
+ if next_url is None:
920
+ return None
921
+
922
+ # The next_url is expected to be a full URL with a query parameter like '?page_token=abc123'
923
+ # We can parse it to extract the page_token value.
924
+ parsed = httpx.URL(next_url)
925
+ page_token = parsed.params.get("page_token")
926
+ return page_token
927
+
928
+
929
+ class RowTypeRegistry:
930
+ """Registry for namedtuple, NamedTuple or @dataclass classes used for deserializing
931
+ ROW values from query results.
932
+
933
+ Users can register their own classes to be used for specific
934
+ field structures via `connection.register_row_type()`. Then any query results
935
+ returning ROW values with matching field names will be deserialized
936
+ into instances of the user-registered class.
937
+
938
+ Otherwise, if no user-registered class matches the field names, a new
939
+ namedtuple class will be created and cached for future use.
940
+ """
941
+
942
+ _cache: dict[tuple[str, ...], type[RowPythonTypes]]
943
+
944
+ def __init__(self):
945
+ # Key: tuple of field names (strings)
946
+ # Value: The specific class object (type)
947
+ self._cache = {}
948
+
949
+ def get_row_class(self, field_names: list[str] | tuple[str, ...]) -> type[RowPythonTypes]:
950
+ """
951
+ Returns the cached user-provided class for handling ROWs with the given field names.
952
+ If none found, creates a namedtuple class (and caches it).
953
+
954
+ field_names: A sequence of strings (e.g., ['name', 'age'])
955
+ """
956
+
957
+ if not isinstance(field_names, (list, tuple)):
958
+ raise TypeError(
959
+ f"field_names must be a list or tuple of strings, got {type(field_names)}"
960
+ )
961
+
962
+ for field in field_names:
963
+ if not isinstance(field, str):
964
+ raise TypeError(f"All field names must be strings, got a {type(field)}")
965
+
966
+ # Create a hashable key from the field names
967
+ key = tuple(field_names)
968
+
969
+ if key not in self._cache:
970
+ # Create a default class name, e.g., 'Row'
971
+ # rename=True handles Flink columns with chars invalid in Python
972
+ new_class = namedtuple("Row", field_names, rename=True)
973
+ logger.debug(
974
+ f"Created new namedtuple class for ROW with fields: {field_names}, "
975
+ f"resulting namedtuple fields: {new_class._fields}"
976
+ ) # pyright: ignore[reportAttributeAccessIssue]
977
+ self._cache[key] = new_class
978
+
979
+ return self._cache[key]
980
+
981
+ def register_row_type(self, user_type_for_row: type[RowPythonTypes]) -> None:
982
+ """
983
+ Registers a user-provided namedtuple, typing.NamedTuple, or @dataclass class by
984
+ the sequence of its field names for future use when deserializing ROW values.
985
+
986
+ Raises TypeError if the provided type is not a supported class type.
987
+ """
988
+
989
+ key: tuple[str, ...] | None = None
990
+
991
+ if isinstance(user_type_for_row, type):
992
+ # Check for duck-typed namedtuple or typing.NamedTuple: subclass of tuple + has _fields
993
+ if issubclass(user_type_for_row, tuple) and hasattr(user_type_for_row, "_fields"):
994
+ key = tuple(user_type_for_row._fields) # pyright: ignore[reportAttributeAccessIssue]
995
+
996
+ # Only other supported type is an @dataclass
997
+ elif is_dataclass(user_type_for_row):
998
+ key = tuple(field.name for field in fields(user_type_for_row))
999
+
1000
+ if key is None:
1001
+ # User passed a non-supported type or an instance of something.
1002
+ raise TypeError(
1003
+ f"Expected a namedtuple, NamedTuple, or @dataclass type, got {user_type_for_row} instead" # noqa: E501
1004
+ )
1005
+
1006
+ # Update the cache to prefer the user's class for this structure
1007
+ self._cache[key] = user_type_for_row