confluent-sql 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,566 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from collections.abc import Iterator
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from typing import TYPE_CHECKING
8
+
9
+ from .exceptions import InterfaceError, OperationalError
10
+ from .execution_mode import ExecutionMode
11
+ from .types import ColumnTypeDefinition, FromResponseTypes, StatementTypeConverter, StrAnyDict
12
+
13
+ if TYPE_CHECKING:
14
+ from .connection import Connection
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ LABEL_PREFIX = "user.confluent.io/"
20
+ """Required prefix for all end-user labels in statement metadata. When filtering statements by
21
+ label, users can provide just the end-user portion of the label (without the prefix) and this driver
22
+ will add the prefix before making API calls."""
23
+
24
+
25
+ class Op(Enum):
26
+ """Row operation types for Flink SQL changelog streams.
27
+
28
+ These operation types correspond to Apache Flink's RowKind enum and indicate
29
+ the type of change for each row in a changelog stream. They are used when
30
+ processing non-append-only streaming queries that can produce updates and deletions.
31
+
32
+ For more information, see:
33
+ https://nightlies.apache.org/flink/flink-docs-stable/api/java/org/apache/flink/types/RowKind.html
34
+ """
35
+
36
+ INSERT = 0
37
+ """Insertion operation.
38
+
39
+ Represents a new row being added to the result set.
40
+ String representation: +I
41
+ """
42
+
43
+ UPDATE_BEFORE = 1
44
+ """Update operation with the previous content of the updated row.
45
+
46
+ This operation SHOULD occur together with UPDATE_AFTER for modelling
47
+ an update that needs to retract the previous row first. Represents
48
+ the "before" state of a row that is being updated.
49
+ String representation: -U
50
+ """
51
+
52
+ UPDATE_AFTER = 2
53
+ """Update operation with new content of the updated row.
54
+
55
+ This operation CAN occur together with UPDATE_BEFORE for modelling
56
+ an update that needs to retract the previous row first. Represents
57
+ the "after" state of a row that has been updated.
58
+ String representation: +U
59
+ """
60
+
61
+ DELETE = 3
62
+ """Deletion operation.
63
+
64
+ Represents a row being removed from the result set.
65
+ String representation: -D
66
+ """
67
+
68
+ def __str__(self):
69
+ if self is self.INSERT:
70
+ return "+I"
71
+ elif self is self.UPDATE_BEFORE:
72
+ return "-U"
73
+ elif self is self.UPDATE_AFTER:
74
+ return "+U"
75
+ elif self is self.DELETE:
76
+ return "-D"
77
+ else:
78
+ raise ValueError(
79
+ f"Unknown value for Op: '{self.value}'. This is probably a bug"
80
+ ) # pragma: no cover
81
+
82
+
83
+ class ChangelogRow:
84
+ """A single row in a changelog stream, including changelog operation type
85
+ and from-json row data."""
86
+
87
+ __slots__ = ("op", "row")
88
+
89
+ op: Op
90
+ """The changelog operation type."""
91
+ row: list[FromResponseTypes]
92
+ """The row data as a list of from-response-api-json values."""
93
+
94
+ def __init__(self, op: int, row: list[FromResponseTypes]):
95
+ self.op = Op(op)
96
+ self.row = row
97
+
98
+
99
+ class Phase(Enum):
100
+ """Statement execution phases with terminal state detection."""
101
+
102
+ PENDING = "PENDING"
103
+ RUNNING = "RUNNING"
104
+ COMPLETED = "COMPLETED"
105
+ DELETING = "DELETING"
106
+ FAILED = "FAILED"
107
+ # This is not documented in the rest api docs, but mentioned here:
108
+ # https://docs.confluent.io/cloud/current/flink/concepts/statements.html#flink-sql-statements
109
+ DEGRADED = "DEGRADED"
110
+ STOPPING = "STOPPING"
111
+ STOPPED = "STOPPED"
112
+
113
+ # This is only used internally,
114
+ # never returned by the api.
115
+ DELETED = "DELETED"
116
+
117
+ def __init__(self, value: str) -> None:
118
+ """Initialize Phase enum member."""
119
+ self._value_ = value
120
+
121
+ @property
122
+ def is_terminal(self) -> bool:
123
+ """Check if this phase is a terminal state (statement execution has ended).
124
+
125
+ Terminal states are those where the statement is no longer executing and will
126
+ not transition to any other state.
127
+
128
+ Returns:
129
+ True if the phase is COMPLETED, STOPPED, FAILED, or DELETED. False otherwise.
130
+ """
131
+ # Terminal phase values defined at class level
132
+ return self.value in Phase._TERMINAL_PHASES # type: ignore[attr-defined]
133
+
134
+
135
+ # Class-level constant defining terminal phases
136
+ Phase._TERMINAL_PHASES = frozenset({"COMPLETED", "STOPPED", "FAILED", "DELETED"}) # type: ignore[attr-defined]
137
+
138
+
139
+ @dataclass
140
+ class Statement:
141
+ """Represents a Confluent SQL statement, including its metadata, spec, status,
142
+ and parsed traits such as schema, sql kind, etc."""
143
+
144
+ # SQL kinds that represent pure DDL statements (create/modify schema objects)
145
+ _PURE_DDL_KINDS = frozenset(
146
+ {"CREATE_TABLE", "DROP_TABLE", "CREATE_VIEW", "DROP_VIEW", "ALTER_TABLE"}
147
+ )
148
+
149
+ # SQL kinds that represent impure DDL (produce no result set but may stream)
150
+ _IMPURE_DDL_KINDS = frozenset({"CREATE_TABLE_AS"})
151
+
152
+ # From the cursor that created this statement ...
153
+ connection: Connection
154
+
155
+ # From the API response fields ...
156
+ statement_id: str
157
+ name: str
158
+ spec: StrAnyDict
159
+ status: StrAnyDict
160
+ metadata: StrAnyDict
161
+ # Parsed fields ...
162
+ traits: Traits | None
163
+
164
+ # Internal state
165
+ _phase: Phase
166
+ _deleted: bool = False
167
+
168
+ @property
169
+ def is_bounded(self) -> bool | None:
170
+ """A bounded statement has a finite result set. It may either come from a snapshot query
171
+ (those submitted in snapshot execution mode -- all such statements are bounded) or a
172
+ streaming query with a defined end (need to find a good example here, but perhaps
173
+ one selecting from a VALUES clause or whatnot).
174
+
175
+ As of Jan 2026, streaming mode CREATE TABLE AS SELECT (CTAS) statements are being
176
+ reported back wrongly as bounded, so this property should be used with caution unless
177
+ considering other factors such as the current phase (such statements should never reach
178
+ a terminal state on their own). This is captured as Jira FSE-1021.
179
+ """
180
+ return self._possible_traits().is_bounded
181
+
182
+ def can_fetch_results(self, execution_mode: ExecutionMode) -> bool:
183
+ """Check if results can be fetched from this statement based on execution mode.
184
+
185
+ This method encapsulates all the complex readiness logic that depends on both
186
+ the statement's characteristics and the execution mode in which it was submitted.
187
+
188
+ Args:
189
+ execution_mode: The execution mode (snapshot or streaming) the statement was
190
+ submitted in.
191
+
192
+ Returns:
193
+ True if results can be fetched, False otherwise.
194
+ """
195
+ # Terminal states are always ready (COMPLETED, FAILED, STOPPED, DELETED)
196
+ if self.phase.is_terminal:
197
+ return True
198
+
199
+ if execution_mode.is_streaming:
200
+ # In streaming mode, readiness depends on statement type.
201
+ if self.is_pure_ddl:
202
+ # Pure DDL must complete fully before the created/modified objects
203
+ # are ready for use. Since we already checked is_terminal above, return False.
204
+ return False
205
+ elif self.is_bounded and not self.is_append_only:
206
+ # Bounded non-append-only queries (e.g., aggregations without streaming input)
207
+ # must complete fully before results are available for fetching.
208
+ # Since we already checked is_terminal above, return False.
209
+ return False
210
+ else:
211
+ # Unbounded streaming queries and append-only bounded queries are ready
212
+ # when RUNNING (terminal states already handled above)
213
+ return self.phase == Phase.RUNNING
214
+ else:
215
+ # In snapshot mode, statements are only ready for result fetching when they
216
+ # reach a terminal state. Terminal states are checked above, so return False.
217
+ return False
218
+
219
+ @property
220
+ def is_failed(self) -> bool:
221
+ """Did the statement fail?"""
222
+ return self.phase == Phase.FAILED
223
+
224
+ @property
225
+ def is_running(self) -> bool:
226
+ return self.phase == Phase.RUNNING
227
+
228
+ @property
229
+ def is_deletable(self) -> bool:
230
+ """Check if the statement can be deleted safely."""
231
+ return self.phase in {Phase.COMPLETED, Phase.FAILED, Phase.STOPPED}
232
+
233
+ @property
234
+ def is_degraded(self) -> bool:
235
+ return self.phase is Phase.DEGRADED
236
+
237
+ @property
238
+ def scaling_status(self) -> StrAnyDict:
239
+ """Get the scaling status from the statement status, if available."""
240
+ scaling_status_dict: StrAnyDict | None = self.status.get("scaling_status")
241
+ if scaling_status_dict is None:
242
+ return {}
243
+ else:
244
+ return scaling_status_dict
245
+
246
+ @property
247
+ def is_pool_exhausted(self) -> bool:
248
+ """Is the statement currently pending and waiting for compute resources due to
249
+ compute pool exhaustion?"""
250
+ return (
251
+ self.phase is Phase.PENDING
252
+ and self.scaling_status.get("scaling_state") == "POOL_EXHAUSTED"
253
+ )
254
+
255
+ @property
256
+ def phase(self) -> Phase:
257
+ if self._deleted:
258
+ return Phase.DELETED
259
+ return self._phase
260
+
261
+ @property
262
+ def compute_pool_id(self) -> str:
263
+ return self.spec["compute_pool_id"]
264
+
265
+ @property
266
+ def principal(self) -> str:
267
+ return self.spec["principal"]
268
+
269
+ @property
270
+ def sql_kind(self) -> str:
271
+ return self._possible_traits().sql_kind
272
+
273
+ @property
274
+ def is_pure_ddl(self) -> bool:
275
+ """Check if this statement is a pure DDL statement that creates or modifies schema objects.
276
+
277
+ Pure DDL statements need to complete fully before the created/modified objects
278
+ are ready for use, unlike streaming queries or CTAS which are ready when RUNNING.
279
+
280
+ Returns:
281
+ True if the statement is one of: CREATE_TABLE, DROP_TABLE, CREATE_VIEW,
282
+ DROP_VIEW, ALTER_TABLE. False otherwise.
283
+ """
284
+ return self.sql_kind in self._PURE_DDL_KINDS
285
+
286
+ @property
287
+ def is_ddl(self) -> bool:
288
+ """Check if this statement is any form of DDL (pure or impure).
289
+
290
+ This includes pure DDL statements (must complete fully) and impure DDL like CTAS
291
+ (can stream but produce no result set). Use is_pure_ddl to check completion requirements.
292
+
293
+ Returns:
294
+ True if the statement is pure DDL or impure DDL, False otherwise.
295
+
296
+ Raises:
297
+ InterfaceError: If traits are unavailable (statement not yet polled or failed).
298
+ """
299
+ kind = self.sql_kind # derefence once to avoid multiple trait lookups in the sets below.
300
+ return kind in self._PURE_DDL_KINDS or kind in self._IMPURE_DDL_KINDS
301
+
302
+ @property
303
+ def is_append_only(self) -> bool:
304
+ """Will this statement's results changelog only have insert/append rows?"""
305
+
306
+ return self._possible_traits().is_append_only
307
+
308
+ @property
309
+ def schema(self) -> Schema | None:
310
+ """Get the result schema of this statement, if available.
311
+
312
+ The schema describes the columns and their types in the statement's result set.
313
+ Use has_schema() to check if this statement can produce a result set.
314
+
315
+ **Availability:**
316
+ - Query statements: Schema available after first server poll (None before)
317
+ - DDL statements: Always None (no result set)
318
+ - FAILED statements: None (traits not sent)
319
+
320
+ Returns:
321
+ Schema object, or None if this statement cannot produce a result set.
322
+
323
+ Raises:
324
+ InterfaceError: If traits are unavailable (statement not yet polled or failed).
325
+ """
326
+ return self._possible_traits().schema
327
+
328
+ def has_schema(self) -> bool:
329
+ """Check if this statement can have a result schema.
330
+
331
+ A statement can have a schema if it's not a DDL statement. Query statements
332
+ (SELECT, INSERT, etc.) produce result sets with schemas, while DDL statements
333
+ (including CTAS) do not.
334
+
335
+ This is different from checking if schema is currently populated. Use this to
336
+ distinguish between:
337
+ - **Legitimate None schema**: DDL statements (will never have schema)
338
+ - **Unexpected None schema**: Query statements (should have schema after first poll)
339
+
340
+ Returns:
341
+ True if this statement can produce a schema (non-DDL query), False if it's DDL.
342
+
343
+ Raises:
344
+ InterfaceError: If traits are unavailable (statement not yet polled or failed).
345
+ """
346
+ return not self.is_ddl
347
+
348
+ @property
349
+ def description(self) -> list[tuple] | None:
350
+ # This is required by the cursor object, see https://peps.python.org/pep-0249/#description
351
+ # It's a list of 7-item tuples, the items represent:
352
+ # (name, type_code, display_size, internal_size, precision, scale, null_ok)
353
+ if self.schema is not None:
354
+ return [
355
+ (
356
+ col.name,
357
+ col.type.type,
358
+ None, # display_size ???
359
+ None, # internal_size ???
360
+ col.type.precision,
361
+ col.type.scale,
362
+ col.type.nullable,
363
+ )
364
+ for col in self.schema
365
+ ]
366
+ return None
367
+
368
+ @property
369
+ def is_deleted(self) -> bool:
370
+ """Has this statement been explicitly deleted?"""
371
+ return self._deleted
372
+
373
+ def set_deleted(self):
374
+ """Mark this statement as deleted."""
375
+ self._deleted = True
376
+
377
+ _type_converter: StatementTypeConverter | None = None
378
+ """Cached SchemaTypeConverter for this statement's schema."""
379
+
380
+ @property
381
+ def type_converter(self) -> StatementTypeConverter:
382
+ """Get or create the SchemaTypeConverter for this statement's schema.
383
+
384
+ The converter handles conversion from JSON-from-API row values to Python values
385
+ based on the statement's schema, for all columns in the result set.
386
+
387
+ Should only be called after statement submission for statements that produce a result set,
388
+ otherwise will raise InterfaceError.
389
+ """
390
+ if self.schema is None:
391
+ raise InterfaceError("Cannot get type converter for statement with no schema.")
392
+
393
+ if self._type_converter is None:
394
+ self._type_converter = StatementTypeConverter(self.connection, self.schema)
395
+
396
+ return self._type_converter
397
+
398
+ @classmethod
399
+ def from_response(cls, connection: Connection, response: StrAnyDict) -> Statement:
400
+ """Create a Statement object from the JSON response returned by the statements API."""
401
+ try:
402
+ # Mandatory fields
403
+ statement_id = response["metadata"]["uid"]
404
+ name = response["name"]
405
+ spec = response["spec"]
406
+ status = response["status"]
407
+ metadata = response["metadata"]
408
+
409
+ # Check the phase first.
410
+ try:
411
+ phase = Phase(status["phase"])
412
+ except ValueError as err:
413
+ raise OperationalError(
414
+ f"Received an unknown phase for statement from the server: {status['phase']}. "
415
+ "This is probably a bug"
416
+ ) from err
417
+
418
+ # Parse traits, which includes the statement schema. Won't be present
419
+ # if the statement failed.
420
+ traits = (
421
+ Traits.from_response(status["traits"])
422
+ if "traits" in status and status["traits"] is not None
423
+ else None
424
+ )
425
+
426
+ # Defensive check: non-failed statements should have traits
427
+ if traits is None and phase != Phase.FAILED:
428
+ raise OperationalError(
429
+ f"Received statement '{name}' in phase {phase} without traits. "
430
+ "This is unexpected and likely indicates a server API change or bug."
431
+ )
432
+ except KeyError as e:
433
+ raise OperationalError(f"Error parsing statement response, missing {e}.") from e
434
+
435
+ return cls(connection, statement_id, name, spec, status, metadata, traits, phase)
436
+
437
+ def _possible_traits(self) -> Traits:
438
+ """Raise InterfaceError if traits are not available, else return them."""
439
+ traits = self.traits
440
+ if traits is None:
441
+ raise InterfaceError("Statement traits are not available -- failed statement?")
442
+ return traits
443
+
444
+ @property
445
+ def end_user_labels(self) -> list[str]:
446
+ """Returns list of end-user labels for this statement, if available.
447
+ End-user labels are labels that were provided by the user at statement
448
+ submission time. They are included in the statement metadata with a
449
+ "user.confluent.io/" prefix, which is stripped off in the returned list.
450
+
451
+ If no end-user labels are available, empty list is returned. If metadata is
452
+ not available, raises InterfaceError.
453
+ """
454
+
455
+ labels = self.metadata.get("labels")
456
+ if labels is None:
457
+ raise InterfaceError("Statement metadata labels are not available.")
458
+
459
+ # strip the "user.confluent.io/" prefix from label keys to get the original end-user labels
460
+ end_user_labels = []
461
+ prefix_length = len(LABEL_PREFIX)
462
+
463
+ # For reasons unknown to me, statement labels is modeled as a dict/object
464
+ # in the API, even though currently only used as a set of strings
465
+ # (the values are always "true").
466
+ for key in labels:
467
+ if key.startswith(LABEL_PREFIX):
468
+ end_user_labels.append(key[prefix_length:])
469
+
470
+ return end_user_labels
471
+
472
+
473
+ @dataclass(kw_only=True)
474
+ class Column:
475
+ """Describes a column in a statement's result set.
476
+
477
+ Each column represents a projected expression or field in the SELECT clause, including
478
+ its name, data type, and optional description (from column comments).
479
+
480
+ Columns are part of the schema, which is part of the statement's traits. See
481
+ Statement.schema for notes on when schema information becomes available.
482
+ """
483
+
484
+ name: str
485
+ """The column name (may be a user-provided alias or auto-generated expression label)."""
486
+ type: ColumnTypeDefinition
487
+ """The data type of the column (e.g., INTEGER, VARCHAR, ARRAY<STRING>, ROW(...), etc.)."""
488
+ description: str | None = None
489
+ """Optional description or comment for this column."""
490
+
491
+ @classmethod
492
+ def from_response(cls, data: StrAnyDict) -> Column:
493
+ column_type = ColumnTypeDefinition.from_response(data["type"])
494
+ return cls(name=data["name"], type=column_type, description=data.get("description"))
495
+
496
+
497
+ @dataclass(kw_only=True)
498
+ class Schema:
499
+ """Schema describing the columns and types of a statement's result set.
500
+
501
+ Used for type conversion and formatting rows as dicts. Present in Traits.schema
502
+ for query statements; always None for DDL statements.
503
+ """
504
+
505
+ columns: list[Column]
506
+ """The columns in the schema."""
507
+
508
+ @classmethod
509
+ def from_response(cls, data: StrAnyDict) -> Schema:
510
+ columns = [Column.from_response(col) for col in data.get("columns", [])]
511
+ return cls(columns=columns)
512
+
513
+ def __iter__(self) -> Iterator[Column]:
514
+ """Iterate over the columns in the schema."""
515
+ return iter(self.columns)
516
+
517
+
518
+ @dataclass(kw_only=True)
519
+ class Traits:
520
+ """Parsed statement traits from server response.
521
+
522
+ Traits contain metadata about a statement including its schema, SQL kind, and
523
+ whether it is append-only or bounded. They are populated from status.traits
524
+ in the server response.
525
+
526
+ **Availability:**
527
+ Traits are None until the statement is polled from the server for the first time.
528
+ The server does NOT send traits for FAILED statements. Once available, traits
529
+ persist for the statement's lifetime.
530
+
531
+ See Statement.schema property for schema availability notes.
532
+ """
533
+
534
+ connection_refs: list[str] | None
535
+ """The names of connections that the SQL statement references (e.g., in FROM clauses)."""
536
+ is_append_only: bool
537
+ """Indicates the special case where results of a statement are insert/append only
538
+ (indicating simple changelog parsing. May be either a streaming or batch/snapshot query.)."""
539
+ is_bounded: bool
540
+ """Does the result set have a bounded number of rows (aka not a streaming result?
541
+ Implies is_append_only.)"""
542
+ schema: Schema | None
543
+ """The schema of the result set, describing columns and their types.
544
+
545
+ None for: DDL statements (no result set), FAILED statements (traits not sent),
546
+ or before first server poll. Schema is guaranteed available for query statements
547
+ after the first poll.
548
+
549
+ See Statement.schema property for usage notes.
550
+ """
551
+ sql_kind: str
552
+ upsert_columns: list[int] | None
553
+ """Zero-based indices of upsert columns, if any."""
554
+
555
+ @classmethod
556
+ def from_response(cls, data: StrAnyDict) -> Traits:
557
+ schema_data = data.get("schema")
558
+ schema = Schema.from_response(schema_data) if schema_data else None
559
+ return cls(
560
+ connection_refs=data.get("connection_refs"),
561
+ is_append_only=data["is_append_only"],
562
+ is_bounded=data["is_bounded"],
563
+ schema=schema,
564
+ sql_kind=data["sql_kind"],
565
+ upsert_columns=data.get("upsert_columns"),
566
+ )