confluent-sql 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,663 @@
1
+ """
2
+ Result reading and buffering for Confluent SQL DB-API driver.
3
+
4
+ This module provides low-level result readers that fetch statement results
5
+ from the server, handle paging, and convert row data from JSON to Python types.
6
+
7
+ - AppendOnlyResultReader: For append-only streaming (or all snapshot mode)
8
+ queries (returns row data only).
9
+ - ChangelogEventReader: For non-append-only queries -- a subset of streaming queries
10
+ (returns ChangeloggedRow with operation + row).
11
+
12
+ These readers fetch and expose either result rows or changelog events including
13
+ result rows but do NOT apply or interpret them. These implementations back
14
+ the iteration, fetchone/fetchmany/fetchall methods of our Cursor class.
15
+
16
+ For stateful compression of changelog events into a logical result set, see
17
+ the `changelog_compressor` module, which makes use of the sequence of ChangeloggedRow
18
+ returned by ChangelogEventReader to produce a compressed result set ("interpreted changelog")
19
+ that applies the changelog operations to maintain the current state of each row in the result
20
+ and the logical result set at large over time. That functionality is exposed from the Cursor
21
+ class's `changelog_compressor()` method (only callable for non-append-only streaming statements).
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import abc
27
+ import logging
28
+ import time
29
+ from collections import deque
30
+ from dataclasses import dataclass
31
+ from itertools import islice
32
+ from typing import TYPE_CHECKING, Generic, NamedTuple, TypeAlias, TypeVar
33
+
34
+ from .exceptions import InterfaceError, NotSupportedError
35
+ from .execution_mode import ExecutionMode
36
+ from .statement import Op, Schema, Statement
37
+ from .types import StrAnyDict, SupportedPythonTypes
38
+
39
+ if TYPE_CHECKING:
40
+ from .connection import Connection
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ ReaderOutput = TypeVar("ReaderOutput")
45
+ """Type that a result reader produces as output from its __iter__ method."""
46
+
47
+
48
+ StatementResultTuple: TypeAlias = tuple[SupportedPythonTypes, ...]
49
+ """The tuple representation of a row of Flink statement results
50
+ after type conversion from Results API JSON to Python types."""
51
+
52
+
53
+ ResultTupleOrDict: TypeAlias = StatementResultTuple | StrAnyDict
54
+ """Output type for AppendOnlyResultReader fetch methods and iteration."""
55
+
56
+
57
+ @dataclass()
58
+ class FetchMetrics:
59
+ """Holds metrics related to fetch results route operations in ResultReader."""
60
+
61
+ total_page_fetches: int = 0
62
+ """Total number of times the reader fetched a page of results from the server."""
63
+
64
+ total_changelog_rows_fetched: int = 0
65
+ """Total number of changelog rows fetched from the server across all pages."""
66
+
67
+ empty_page_fetches: int = 0
68
+ """Number of times the reader fetched a page of results that contained no rows."""
69
+
70
+ fetch_request_secs: float = 0.0
71
+ """Total elapsed seconds spent on results fetch operations (excluding any pauses)."""
72
+
73
+ paused_times: int = 0
74
+ """Number of times the reader paused before fetching the next page of results."""
75
+
76
+ paused_secs: float = 0.0
77
+ """Total number of seconds the reader spent paused before fetching pages."""
78
+
79
+ _before_fetch_timestamp: float | None = None
80
+ """Internal timestamp to track when a fetch operation started, used for metrics calculation."""
81
+
82
+ @property
83
+ def avg_rows_per_page(self) -> float:
84
+ """Average number of changelog rows fetched per page."""
85
+ if self.total_page_fetches == 0:
86
+ return 0.0
87
+ return self.total_changelog_rows_fetched / self.total_page_fetches
88
+
89
+ def paused_before_fetch(self, pause_secs: float) -> None:
90
+ """Record that the reader paused for some time before fetching results."""
91
+ self.paused_times += 1
92
+ self.paused_secs += pause_secs
93
+
94
+ def prep_for_fetch(self) -> None:
95
+ """Call before starting a fetch operation to record the start time."""
96
+ self._before_fetch_timestamp = time.monotonic()
97
+
98
+ def record_fetch_completion(self, rows_fetched: int) -> None:
99
+ """Call after completing a fetch operation to update metrics.
100
+
101
+ Args:
102
+ rows_fetched: The number of changelog rows fetched in the completed operation.
103
+ """
104
+ if self._before_fetch_timestamp is None:
105
+ raise InterfaceError(
106
+ "prep_for_fetch() must be called before recording fetch completion."
107
+ ) # pragma: no cover
108
+ elapsed_secs = time.monotonic() - self._before_fetch_timestamp
109
+ self.fetch_request_secs += elapsed_secs
110
+ self.total_changelog_rows_fetched += rows_fetched
111
+ self.total_page_fetches += 1
112
+ if rows_fetched == 0:
113
+ self.empty_page_fetches += 1
114
+ self._before_fetch_timestamp = None
115
+
116
+
117
+ class RowFormatter(abc.ABC):
118
+ """Base class for row formatters that convert statement result tuples to requested format.
119
+
120
+ This abstraction encapsulates the logic for formatting rows based on the cursor's
121
+ `as_dict` configuration. It centralizes the format decision in the result reader layer,
122
+ ensuring that compressors work with already-formatted rows.
123
+ """
124
+
125
+ @abc.abstractmethod
126
+ def format(self, row: StatementResultTuple) -> ResultTupleOrDict:
127
+ """Convert a tuple row to the configured format.
128
+
129
+ Args:
130
+ row: A tuple of values representing a result row.
131
+
132
+ Returns:
133
+ A row in the appropriate format for this formatter.
134
+ """
135
+
136
+ @staticmethod
137
+ def create(as_dict: bool, schema: Schema | None) -> RowFormatter:
138
+ """Factory method to create the appropriate formatter.
139
+
140
+ Args:
141
+ as_dict: If True, create a dict formatter; if False, create a tuple formatter.
142
+ schema: The statement schema for column names (required if as_dict=True).
143
+
144
+ Returns:
145
+ A RowFormatter instance configured for the requested format.
146
+
147
+ Raises:
148
+ InterfaceError: If as_dict is True but schema is not available.
149
+ """
150
+ if as_dict:
151
+ if not schema:
152
+ raise InterfaceError("Schema required to format rows as dicts")
153
+ return DictRowFormatter(schema)
154
+ return TupleRowFormatter()
155
+
156
+
157
+ class TupleRowFormatter(RowFormatter):
158
+ """Formatter that returns rows as tuples (pass-through, no conversion)."""
159
+
160
+ def format(self, row: StatementResultTuple) -> StatementResultTuple:
161
+ """Return the row unchanged as a tuple.
162
+
163
+ Args:
164
+ row: A tuple of values representing a result row.
165
+
166
+ Returns:
167
+ The input row as-is.
168
+ """
169
+ return row
170
+
171
+
172
+ class DictRowFormatter(RowFormatter):
173
+ """Formatter that converts result tuples to dictionaries using column names."""
174
+
175
+ def __init__(self, schema: Schema):
176
+ """Initialize the dict formatter with column names from schema.
177
+
178
+ Args:
179
+ schema: The statement schema containing column definitions.
180
+ """
181
+ self._column_names = [col.name for col in schema.columns]
182
+
183
+ def format(self, row: StatementResultTuple) -> StrAnyDict:
184
+ """Convert a tuple row to a dictionary using column names.
185
+
186
+ Args:
187
+ row: A tuple of values representing a result row.
188
+
189
+ Returns:
190
+ A dictionary mapping column names to values.
191
+ """
192
+ return dict(zip(self._column_names, row, strict=True))
193
+
194
+
195
+ class ResultReader(Generic[ReaderOutput], abc.ABC):
196
+ """Abstract base class for result readers.
197
+
198
+ Important: Iteration vs Fetch Methods Behavior
199
+ ------------------------------------------------
200
+ This reader provides two ways to consume results, with different
201
+ blocking behaviors in streaming mode:
202
+
203
+ 1. **Iteration (for row in reader):**
204
+ - Always blocking in both snapshot and streaming modes
205
+ - Waits for data to become available or until definitively complete
206
+ - Suitable for consuming complete result sets
207
+ - Will retry fetching pages until data arrives
208
+
209
+ 2. **Fetch methods (fetchone/fetchmany):**
210
+ - Blocking in snapshot mode (traditional DB-API behavior)
211
+ - Non-blocking in streaming mode (at most one server request)
212
+ - Suitable for polling patterns in streaming applications
213
+ - Use may_have_results to distinguish temporary vs permanent emptiness
214
+
215
+ Recommendations:
216
+ - For snapshot queries: Use either approach (both block until complete)
217
+ - For streaming queries: Use fetch methods for polling, iteration for continuous consumption
218
+ """
219
+
220
+ _connection: Connection
221
+ """The connection associated with this result reader."""
222
+
223
+ _statement: Statement
224
+ """The statement associated with this result reader."""
225
+
226
+ _fetch_next_page_called: bool
227
+ """Whether _fetch_next_page has been called at least once.
228
+ TODO: discard in favor of _most_recent_results_fetch_time
229
+ """
230
+
231
+ _next_page: str | None
232
+ """The URL of the next page of results, if any.
233
+
234
+ Initial state is None, but may also be set to None after fetching a page if there are
235
+ no more pages to fetch (distinguished from the initial state by
236
+ `_fetch_next_page_called` being set to `True`)."""
237
+
238
+ _results: deque[ReaderOutput]
239
+ """Deque of unconsumed results. Rows are removed via popleft() as they
240
+ are consumed, freeing memory incrementally."""
241
+
242
+ _most_recent_results_fetch_time: float | None
243
+ """Timestamp of the most recent results fetch operation, in seconds since epoch.
244
+ Used for result page fetch pacing."""
245
+
246
+ _metrics: FetchMetrics
247
+ """Metrics related to results fetching operations"""
248
+
249
+ _execution_mode: ExecutionMode
250
+ """The execution mode for this reader (snapshot vs streaming)."""
251
+
252
+ def __init__(
253
+ self,
254
+ connection: Connection,
255
+ statement: Statement,
256
+ execution_mode: ExecutionMode,
257
+ as_dict: bool = False,
258
+ ):
259
+ self._connection = connection
260
+ self._statement = statement
261
+ self._execution_mode = execution_mode
262
+ self._as_dict = as_dict
263
+
264
+ self._next_page = None
265
+ self._fetch_next_page_called = False
266
+ self._most_recent_results_fetch_time = None
267
+
268
+ self._results: deque[ReaderOutput] = deque()
269
+ self._metrics = FetchMetrics()
270
+ self._row_formatter: RowFormatter | None = None
271
+
272
+ @abc.abstractmethod
273
+ def _retain(self, op: Op, decoded: ResultTupleOrDict) -> None:
274
+ """Retain the changelog row in the reader's internal state.
275
+
276
+ This is used by ChangelogEventReader to retain the full changelog result,
277
+ and by AppendOnlyResultReader to retain just the row data (after validating
278
+ that the operation is an INSERT if provided by the server).
279
+
280
+ The exact retention logic is left to the concrete implementations since it may differ
281
+ based on whether we are retaining full changelog results or just row data.
282
+ """
283
+ raise NotImplementedError("Abstract method") # pragma: no cover
284
+
285
+ def __iter__(self) -> ResultReader[ReaderOutput]:
286
+ """Returns an iterator over the result reader results.
287
+
288
+ Important: Iteration always uses blocking behavior, even in streaming mode.
289
+ When the buffer is empty, iteration will fetch and wait for more data to
290
+ become available. This differs from fetchone/fetchmany which are non-blocking
291
+ in streaming mode.
292
+
293
+ For streaming queries where non-blocking behavior is desired, use fetchone()
294
+ or fetchmany() in a polling loop instead of iteration.
295
+ """
296
+ return self
297
+
298
+ def fetchone(self) -> ReaderOutput | None:
299
+ """
300
+ Fetch the next row from the query result.
301
+
302
+ Behavior depends on execution mode:
303
+ - Snapshot mode: Blocking behavior, may fetch multiple pages to return a row
304
+ - Streaming mode: Non-blocking, fetches at most one page per call
305
+
306
+ Returns:
307
+ A single row or None if no rows are available.
308
+ In streaming mode, use may_have_results property to distinguish between
309
+ temporary emptiness (more data may come) and end of results.
310
+
311
+ Note: This non-blocking behavior in streaming mode differs from iteration.
312
+ When iterating (for row in reader), the reader will block waiting
313
+ for data. Use fetchone() in a polling loop for non-blocking streaming:
314
+
315
+ Example:
316
+ # Streaming mode polling pattern
317
+ while reader.may_have_results:
318
+ row = reader.fetchone()
319
+ if row is not None:
320
+ process(row)
321
+ else:
322
+ # No data right now, could sleep/yield control
323
+ time.sleep(0.1)
324
+ """
325
+ res = self._get_next_results(1)
326
+ assert len(res) <= 1, "fetchone returned more than one result, this is probably a bug"
327
+ # If no results are available, `res` is an empty list,
328
+ # but we want to return None in this case: https://peps.python.org/pep-0249/#fetchone
329
+ return res[0] if res else None
330
+
331
+ @property
332
+ def metrics(self) -> FetchMetrics:
333
+ """Return the current metrics over results fetching activity."""
334
+ return self._metrics
335
+
336
+ def _consume_from_buffer(self, limit: int) -> list[ReaderOutput]:
337
+ """
338
+ Consume up to 'limit' results from the deque buffer.
339
+
340
+ Uses popleft() to remove rows from the front of the deque. This is destructive
341
+ consumption: once removed, rows cannot be re-accessed. Memory is released
342
+ incrementally as deque blocks become empty (approximately every 64 popleft()
343
+ calls in CPython). This automatic freeing eliminates the need for manual
344
+ buffer clearing and enables O(page_size) memory usage instead of O(total_rows).
345
+
346
+ Args:
347
+ limit: Maximum number of results to consume from buffer.
348
+
349
+ Returns:
350
+ List of up to 'limit' results from the buffer.
351
+ """
352
+ actual_limit = min(limit, len(self._results))
353
+ consumed = []
354
+ for _ in range(actual_limit):
355
+ consumed.append(self._results.popleft())
356
+ return consumed
357
+
358
+ def _get_next_results(self, limit: int | None) -> list[ReaderOutput]:
359
+ """
360
+ Retrieve up to `limit` results, with behavior depending on execution mode.
361
+
362
+ This is the core result-fetching method that all public fetch methods
363
+ delegate to. The behavior differs based on execution mode:
364
+
365
+ - Snapshot mode: Uses blocking behavior for bounded result sets, fetching
366
+ multiple pages if needed to satisfy the requested limit (traditional
367
+ DB-API behavior).
368
+ - Streaming mode: Non-blocking, makes at most one request to the server,
369
+ returning whatever is available (suitable for polling).
370
+ - Unlimited fetches (fetchall): Always blocking.
371
+
372
+ Args:
373
+ limit: Maximum number of results to return, or None for all remaining.
374
+
375
+ Returns:
376
+ A list of result rows (as tuples or dicts based on `as_dict` flag).
377
+ Behavior depends on mode and reader type.
378
+
379
+ Raises:
380
+ InterfaceError: If limit is None and the statement is unbounded (streaming),
381
+ since iteration would never complete.
382
+ """
383
+ if limit is None:
384
+ # fetchall() - maintain blocking behavior to fetch everything
385
+ return list(self)
386
+
387
+ # Determine if we should use blocking behavior
388
+ # Snapshot mode uses traditional blocking behavior for bounded result sets
389
+ # Streaming mode uses non-blocking for efficient polling
390
+ if self._execution_mode.is_snapshot:
391
+ # Traditional blocking behavior for snapshot mode with append-only
392
+ # Use iteration to fetch as many pages as needed to satisfy limit
393
+ return list(islice(self, limit))
394
+
395
+ # Non-blocking behavior for streaming mode or changelog processing
396
+ # Check if we have buffered results (unconsumed rows in deque)
397
+ if len(self._results) > 0:
398
+ return self._consume_from_buffer(limit)
399
+
400
+ # Buffer is empty.
401
+
402
+ # Check if we can fetch more
403
+ if self._is_exhausted():
404
+ # We've already fetched all available pages
405
+ return []
406
+
407
+ # Try to fetch one page of results
408
+ self._fetch_next_page()
409
+
410
+ # Return up to 'limit' results from what we just fetched (check unconsumed rows in deque)
411
+ if len(self._results) > 0:
412
+ return self._consume_from_buffer(limit)
413
+
414
+ return [] # Fetched but got no results
415
+
416
+ @property
417
+ def may_have_results(self) -> bool:
418
+ """Whether there may be results to fetch."""
419
+ return (
420
+ # We haven't fetched any pages yet to know about results or next page token
421
+ (not self._fetch_next_page_called)
422
+ # Or we have unconsumed results in the local buffer (deque)
423
+ or len(self._results) > 0
424
+ # Or we know there are more pages to fetch.
425
+ or self._next_page is not None
426
+ )
427
+
428
+ def _is_exhausted(self) -> bool:
429
+ """Whether we've fetched all available pages (no more pages to fetch)."""
430
+ return self._fetch_next_page_called and not self._next_page
431
+
432
+ def fetchmany(self, size: int) -> list[ReaderOutput]:
433
+ """
434
+ Fetch up to 'size' rows from the query result.
435
+
436
+ Behavior depends on execution mode:
437
+ - Snapshot mode: Blocking behavior, may fetch multiple pages to satisfy count
438
+ - Streaming mode: Non-blocking, fetches at most one page per call and may
439
+ return fewer rows than requested (including an empty list)
440
+
441
+ Use may_have_results property to distinguish between temporary emptiness
442
+ and end of results in streaming mode.
443
+
444
+ Note: This non-blocking behavior in streaming mode differs from iteration.
445
+ Direct iteration will block waiting for data to fill the requested size.
446
+
447
+ Example:
448
+ # Streaming mode batch polling pattern
449
+ while reader.may_have_results:
450
+ batch = reader.fetchmany(100)
451
+ if batch:
452
+ for row in batch:
453
+ process(row)
454
+ else:
455
+ # No data available right now
456
+ time.sleep(0.1)
457
+
458
+ Args:
459
+ size: Maximum number of rows to return. Must be positive.
460
+
461
+ Returns:
462
+ List of 0 to 'size' rows, depending on what's available.
463
+ In streaming mode, may return empty list even if more data will come.
464
+
465
+ Raises:
466
+ InterfaceError: If size is not positive.
467
+ """
468
+ if size <= 0:
469
+ raise InterfaceError(f"size must be a positive integer, got {size}")
470
+
471
+ return self._get_next_results(size)
472
+
473
+ def fetchall(self) -> list[ReaderOutput]:
474
+ """
475
+ Fetch all remaining rows of a query result.
476
+
477
+ This method will fetch all remaining pages from the server
478
+ and accumulate them in memory.
479
+
480
+ Warning:
481
+ This downloads the entire remaining result set into memory.
482
+ For large result sets, consider using iteration or `fetchmany()`
483
+ to process results in batches.
484
+
485
+ Returns:
486
+ A list of tuples or dicts (based on `as_dict` flag).
487
+ Returns an empty list if no rows are available.
488
+
489
+ Raises:
490
+ NotSupportedError: If called on an unbounded streaming statement,
491
+ since fetchall() would never complete.
492
+
493
+ See Also:
494
+ https://peps.python.org/pep-0249/#fetchall
495
+ """
496
+
497
+ if self._statement.is_bounded is False:
498
+ raise NotSupportedError(
499
+ "Cannot call fetchall() on an unbounded streaming statement. "
500
+ "Use fetchone(), fetchmany(), or iterate with a limit instead."
501
+ )
502
+
503
+ return self._get_next_results(None)
504
+
505
+ def __next__(self) -> ReaderOutput:
506
+ """Implementation of iterator protocol.
507
+
508
+ If there are buffered results in the deque, return the next one immediately.
509
+
510
+ This method implements blocking iteration behavior:
511
+ - When the buffer is empty and we know there may be more data, it calls
512
+ _fetch_next_page() to (try) to get it.
513
+ - Raises StopIteration only when no more results will ever be available
514
+ - In streaming mode, this means iteration will block/wait for new data
515
+
516
+ Note: This blocking behavior differs from fetchone/fetchmany in streaming
517
+ mode, which return immediately with None/empty list if no data is buffered.
518
+ """
519
+ while True:
520
+ # If buffer has results, return one immediately
521
+ if len(self._results) > 0:
522
+ return self._results.popleft()
523
+
524
+ # Buffer is empty. Check if we've exhausted all available pages.
525
+ # Using _fetch_next_page_called flag rather than buffer state because
526
+ # deque's destructive popleft() consumption leaves an empty deque
527
+ # indistinguishable from "never fetched yet".
528
+ if self._is_exhausted():
529
+ raise StopIteration
530
+
531
+ # Try to fetch the next page. In streaming scenarios, this may return
532
+ # an empty page if data hasn't arrived yet (and self._results will remain empty).
533
+ # Keep looping to retry.
534
+ #
535
+ # (This is not a completely hard loop because _fetch_next_page() internally
536
+ # pauses to avoid hammering the server with requests when data currently available.)
537
+ self._fetch_next_page()
538
+
539
+ def _fetch_next_page(self) -> None:
540
+ """Try to fetch and process the next page of results.
541
+
542
+ Pauses momentarily periodically if configured to avoid hard loops
543
+ and possible rate limiting when data isn't available yet in streaming scenarios.
544
+ """
545
+
546
+ if not self._statement.can_fetch_results(self._execution_mode):
547
+ raise InterfaceError("Statement is not ready for result fetching.")
548
+
549
+ if not self._statement.schema:
550
+ raise InterfaceError("Trying to fetch results for a non-query statement")
551
+
552
+ if self._most_recent_results_fetch_time is not None:
553
+ # Should we pause before fetching the next page?
554
+ elapsed_secs = time.monotonic() - self._most_recent_results_fetch_time
555
+ if elapsed_secs < self._connection.statement_results_page_fetch_pause_secs:
556
+ # Sleep the difference between when we last fetched results
557
+ # and the configured pause time so that we ensure to not
558
+ # hit the endpoint for this statement more often than
559
+ # the configured pause time.
560
+ pause_secs = self._connection.statement_results_page_fetch_pause_secs - elapsed_secs
561
+ time.sleep(pause_secs)
562
+ self._metrics.paused_before_fetch(pause_secs)
563
+
564
+ self._metrics.prep_for_fetch()
565
+
566
+ # Get (possibly empty) list of ChangelogRow results from the server and the next page link,
567
+ # if any, for the next fetch.
568
+ results, next_page_link = self._connection._get_statement_results(
569
+ self._statement.name, self._next_page
570
+ )
571
+
572
+ # Do the accounting that we did just fetch results
573
+ self._fetch_next_page_called = True
574
+ self._most_recent_results_fetch_time = time.monotonic()
575
+ self._metrics.record_fetch_completion(len(results))
576
+ self._next_page = next_page_link
577
+
578
+ if len(results) == 0:
579
+ return # No results to process, but we did just fetch and update metrics, so return
580
+
581
+ # Process each changelog row just fetched ...
582
+
583
+ # Capture the needed parts into local variables for faster access in the per-result-row loop
584
+ type_converter_to_python_row = self._statement.type_converter.to_python_row
585
+ if self._row_formatter is None:
586
+ # Lazily initialize formatter when we first need it (ensures schema is available)
587
+ self._row_formatter = RowFormatter.create(self._as_dict, self._statement.schema)
588
+ row_formatter_format = self._row_formatter.format
589
+ retain = self._retain
590
+
591
+ for res in results:
592
+ # Promote row members from their JSON encoding to Python types
593
+ decoded_row = type_converter_to_python_row(res.row)
594
+ # Format row according to cursor's as_dict setting (tuple or dict)
595
+ formatted_row = row_formatter_format(decoded_row)
596
+ # Retain the row (and perhaps also the operation) in the reader's internal state
597
+ retain(res.op, formatted_row)
598
+
599
+
600
+ class AppendOnlyResultReader(ResultReader[ResultTupleOrDict]):
601
+ """Append-only result reader implementation.
602
+
603
+ Returns statement result rows as either tuples or dicts based on the `as_dict` flag.
604
+ """
605
+
606
+ def _retain(self, op: Op, decoded: ResultTupleOrDict) -> None:
607
+ """Retain the changelog row in the reader's internal state.
608
+
609
+ For AppendOnlyResultReader, we only retain the row data (after validating
610
+ that the operation is an INSERT if provided by the server), since we only return
611
+ the row data in fetch and iteration methods.
612
+
613
+ Raise NotSupportedError if a non-INSERT operation is encountered.
614
+
615
+ Args:
616
+ op: The changelog operation type.
617
+ decoded: The row data as either a tuple or dict based on the `as_dict` flag,
618
+ after type conversion from Results API JSON to Python types.
619
+ """
620
+ if op is not None and op != Op.INSERT:
621
+ # Only expect INSERT operations for append-only
622
+ logger.error(f"Received non-INSERT op {op} in results for append-only statement.")
623
+ raise NotSupportedError(f"Non-INSERT op was received by AppendOnlyResultReader: {op}. ")
624
+
625
+ self._results.append(decoded)
626
+
627
+
628
+ class ChangeloggedRow(NamedTuple):
629
+ """Changelog operation and corresponding row data after type conversion from Results API JSON
630
+ to Python types. Returned by cursors using ChangelogEventReader for non-append-only statements.
631
+ """
632
+
633
+ op: Op
634
+ """The changelog operation type."""
635
+ row: ResultTupleOrDict
636
+ """The row data as either a tuple or dict based on the `as_dict` flag, after type conversion
637
+ from Results API JSON to Python types."""
638
+
639
+
640
+ class ChangelogEventReader(ResultReader[ChangeloggedRow]):
641
+ """Non-append-only changelog event reader implementation.
642
+
643
+ Returns changelog results as `ChangeloggedRow` namedtuples containing both the operation
644
+ type (op) and the tuple or dict row data (`row`).
645
+
646
+ Used for the subset of streaming statements that are not append-only, where we need to return
647
+ the changelog operation type along with each row.
648
+
649
+ No changelog interpretation is done at this level.
650
+ """
651
+
652
+ def _retain(self, op: Op, decoded: ResultTupleOrDict) -> None:
653
+ """Retain the changelog row in the reader's internal state.
654
+
655
+ For ChangelogEventReader, we retain both the operation type and the row data
656
+
657
+ Args:
658
+ op: The changelog operation type.
659
+ decoded: The row data as either a tuple or dict based on the `as_dict` flag,
660
+ after type conversion from Results API JSON to Python types.
661
+ """
662
+
663
+ self._results.append(ChangeloggedRow(op, decoded))