confluent-sql 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ """
2
+ Confluent SQL - DB-API v2 compliant driver for Confluent SQL.
3
+
4
+ This module provides a DB-API v2 compliant interface for connecting to and
5
+ executing SQL queries against Confluent SQL services.
6
+ """
7
+
8
+ from .changelog_compressor import ChangelogCompressor
9
+ from .connection import Connection, connect
10
+ from .cursor import Cursor
11
+ from .exceptions import (
12
+ ComputePoolExhaustedError,
13
+ DatabaseError,
14
+ DataError,
15
+ Error,
16
+ IntegrityError,
17
+ InterfaceError,
18
+ InternalError,
19
+ NotSupportedError,
20
+ OperationalError,
21
+ ProgrammingError,
22
+ StatementDeletedError,
23
+ StatementStoppedError,
24
+ TypeMismatchError,
25
+ Warning,
26
+ )
27
+ from .execution_mode import ExecutionMode
28
+ from .result_readers import ChangeloggedRow
29
+ from .statement import Op
30
+ from .types import SqlNone, YearMonthInterval
31
+
32
+ # DB-API v2 module globals
33
+ apilevel = "2.0"
34
+ threadsafety = 1 # Threads may share the module but not connections
35
+ paramstyle = "pyformat" # Use question mark style parameters
36
+
37
+ __all__ = [
38
+ "connect",
39
+ "Connection",
40
+ "Cursor",
41
+ "ExecutionMode",
42
+ "Op",
43
+ "ChangeloggedRow",
44
+ "ChangelogCompressor",
45
+ "Warning",
46
+ "Error",
47
+ "InterfaceError",
48
+ "DatabaseError",
49
+ "DataError",
50
+ "OperationalError",
51
+ "ComputePoolExhaustedError",
52
+ "StatementStoppedError",
53
+ "StatementDeletedError",
54
+ "IntegrityError",
55
+ "InternalError",
56
+ "ProgrammingError",
57
+ "NotSupportedError",
58
+ "TypeMismatchError",
59
+ "apilevel",
60
+ "threadsafety",
61
+ "paramstyle",
62
+ "SqlNone",
63
+ "YearMonthInterval",
64
+ ]
@@ -0,0 +1,10 @@
1
+ """Version information for the confluent_sql package."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ try:
6
+ # Get version from installed package metadata (pyproject.toml)
7
+ VERSION = version("confluent-sql")
8
+ except PackageNotFoundError:
9
+ # Fallback for development/editable installs where package isn't found
10
+ VERSION = "0.0.0+dev"
@@ -0,0 +1,603 @@
1
+ """
2
+ Changelog state management and compression for streaming non-append-only queries.
3
+
4
+ This module provides the RECOMMENDED high-level interface for client code working with
5
+ streaming non-append-only Flink statements (e.g., GROUP BY, JOIN). Instead of manually
6
+ processing raw changelog events (INSERT, UPDATE_BEFORE/AFTER, DELETE), clients should use
7
+ a ChangelogCompressor to automatically maintain a logical result set that reflects the
8
+ current state over time.
9
+
10
+ Usage:
11
+ cursor = conn.streaming_cursor()
12
+ cursor.execute("SELECT first_letter, COUNT(*) FROM users GROUP BY first_letter")
13
+ compressor = cursor.changelog_compressor()
14
+
15
+ # Iterate over snapshots until the query is stopped
16
+ for snapshot in compressor.snapshots():
17
+ process(snapshot)
18
+ time.sleep(5) # Optional: wait between polls
19
+
20
+ # Generator exits when query is externally stopped/deleted or fails
21
+
22
+ Compressors consume raw changelog events from ChangelogEventReader (via the cursor)
23
+ and apply operations to maintain the compressed result set. Storage strategies are
24
+ automatically selected based on whether the statement has upsert columns (dict-based
25
+ keyed lookup vs list-based scanning).
26
+
27
+ For low-level changelog fetching without state management, see the `result_readers` module.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import abc
33
+ import copy
34
+ import logging
35
+ from collections.abc import Generator
36
+ from typing import TYPE_CHECKING, cast
37
+
38
+ from .exceptions import InterfaceError, StatementStoppedError
39
+ from .result_readers import ChangeloggedRow, ResultTupleOrDict
40
+ from .statement import Op, Schema, Statement
41
+
42
+ if TYPE_CHECKING:
43
+ from .cursor import Cursor
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ def create_changelog_compressor(cursor: Cursor, statement: Statement) -> ChangelogCompressor:
49
+ """Factory function to create the appropriate changelog compressor.
50
+
51
+ This function determines which concrete compressor class to instantiate based on
52
+ whether the statement has upsert columns. The decision of whether to return tuple or
53
+ dict rows is made by the result reader layer and is transparent to the compressor.
54
+
55
+ Args:
56
+ cursor: The cursor to fetch changelog data from.
57
+ statement: The statement associated with the cursor.
58
+
59
+ Returns:
60
+ An appropriate ChangelogCompressor instance.
61
+
62
+ Raises:
63
+ InterfaceError: If the cursor is not configured for changelog results.
64
+ """
65
+ if not cursor.returns_changelog:
66
+ raise InterfaceError(
67
+ "Changelog compressor can only be created for streaming non-append-only queries. "
68
+ "This query does not return changelog results."
69
+ )
70
+
71
+ # Determine if we have upsert columns
72
+ has_upsert_columns = bool(statement.traits and statement.traits.upsert_columns)
73
+
74
+ # Select the appropriate concrete compressor class based on upsert columns only
75
+ if has_upsert_columns:
76
+ return UpsertColumnsCompressor(cursor, statement)
77
+ else:
78
+ return NoUpsertColumnsCompressor(cursor, statement)
79
+
80
+
81
+ class ChangelogCompressor(abc.ABC):
82
+ """Abstract base class for changelog compressors.
83
+
84
+ Compressors accumulate changelog operations and maintain a logical result set
85
+ that changes over time based on INSERT, UPDATE_BEFORE/AFTER, and DELETE operations.
86
+ """
87
+
88
+ _cursor: Cursor
89
+ """The cursor to fetch changelog data from."""
90
+
91
+ _statement: Statement
92
+ """The statement associated with the cursor."""
93
+
94
+ _upsert_columns: list[int] | None
95
+ """Zero-based indices of upsert columns from the statement traits, if any."""
96
+
97
+ _schema: Schema
98
+ """The schema of the result set."""
99
+
100
+ def __init__(self, cursor: Cursor, statement: Statement):
101
+ """Initialize the compressor with a cursor and statement.
102
+
103
+ Args:
104
+ cursor: The cursor to fetch changelog data from.
105
+ statement: The statement associated with the cursor.
106
+
107
+ Raises:
108
+ InterfaceError: If the cursor does not return changelog results or
109
+ if the statement does not have a schema.
110
+ """
111
+ self._cursor = cursor
112
+ self._statement = statement
113
+
114
+ # Validate this is a changelog query
115
+ if not cursor.returns_changelog:
116
+ raise InterfaceError(
117
+ "ChangelogCompressor can only be created for streaming non-append-only queries"
118
+ )
119
+
120
+ # Validate statement has a schema
121
+ if not statement.schema:
122
+ raise InterfaceError("ChangelogCompressor requires a statement with a schema")
123
+
124
+ # Get statement info we need
125
+ self._upsert_columns = statement.traits.upsert_columns if statement.traits else None
126
+ self._schema = statement.schema
127
+
128
+ @abc.abstractmethod
129
+ def _apply_operation(self, op: Op, row: ResultTupleOrDict) -> None:
130
+ """Apply a changelog operation to the internal state.
131
+
132
+ Args:
133
+ op: The changelog operation.
134
+ row: The row data.
135
+ """
136
+ ...
137
+
138
+ @abc.abstractmethod
139
+ def _copy_accumulated_rows(self) -> list[ResultTupleOrDict]:
140
+ """Return a deep copy of the accumulated rows from internal storage.
141
+
142
+ Returns:
143
+ A deep copy list of the current logical result set.
144
+ """
145
+ ...
146
+
147
+ @abc.abstractmethod
148
+ def _has_pending_update(self) -> bool:
149
+ """Check if there's a pending UPDATE_BEFORE awaiting UPDATE_AFTER.
150
+
151
+ Returns:
152
+ True if a pending update is in progress, False otherwise.
153
+ """
154
+ ...
155
+
156
+ @abc.abstractmethod
157
+ def _clear_storage(self) -> None:
158
+ """Clear internal row storage."""
159
+ ...
160
+
161
+ @abc.abstractmethod
162
+ def _clear_pending_update(self) -> None:
163
+ """Clear pending update tracking state."""
164
+ ...
165
+
166
+ def _validate_no_pending_update(self, op: Op, row: ResultTupleOrDict) -> None:
167
+ """Raise if we have a pending update and aren't processing UPDATE_AFTER.
168
+
169
+ Args:
170
+ op: The changelog operation being processed.
171
+ row: The row data.
172
+
173
+ Raises:
174
+ InterfaceError: If a pending UPDATE_BEFORE exists while processing a
175
+ non-UPDATE_AFTER operation.
176
+ """
177
+ if op != Op.UPDATE_AFTER and self._has_pending_update():
178
+ raise InterfaceError(f"Received {op.name} while an UPDATE_BEFORE is pending: {row}")
179
+
180
+ def _resolve_batchsize(self, fetch_batchsize: int | None) -> int:
181
+ """Resolve and validate the batch size to use for fetching.
182
+
183
+ Args:
184
+ fetch_batchsize: Explicit batch size, or None to use cursor.arraysize.
185
+
186
+ Returns:
187
+ The resolved batch size as a positive integer.
188
+
189
+ Raises:
190
+ InterfaceError: If fetch_batchsize is not a positive int.
191
+ """
192
+ # Validate explicit batch size parameter if provided
193
+ if fetch_batchsize is not None:
194
+ # Reject non-int values (including bool) even if they happen to compare or cast
195
+ if isinstance(fetch_batchsize, bool) or not isinstance(fetch_batchsize, int):
196
+ raise InterfaceError(
197
+ f"fetch_batchsize must be an int, got {type(fetch_batchsize).__name__}"
198
+ )
199
+ if fetch_batchsize <= 0:
200
+ raise InterfaceError(f"fetch_batchsize must be positive, got {fetch_batchsize}")
201
+ return fetch_batchsize
202
+
203
+ # Fall back to cursor.arraysize (which is guaranteed valid by its property setter)
204
+ return self._cursor.arraysize
205
+
206
+ def snapshots(
207
+ self, fetch_batchsize: int | None = None
208
+ ) -> Generator[list[ResultTupleOrDict], None, None]:
209
+ """Generator that yields snapshots of the accumulated result set until the query stops.
210
+
211
+ This generator continuously polls for new changelog events, applies them to the internal
212
+ state, and yields self-consistent snapshots of the accumulated result set. It automatically
213
+ terminates when the streaming query enters a terminal state and all results have been
214
+ consumed.
215
+
216
+ Each iteration fetches ALL currently available changelog events from the cursor (until
217
+ fetchmany returns an empty list), applies them to the internal state, and yields a
218
+ self-consistent snapshot.
219
+
220
+ **Self-Consistency**: A snapshot is considered self-consistent when all currently
221
+ available changelog events have been consumed and applied. This means the snapshot
222
+ reflects a coherent state with no pending UPDATE_BEFORE operations awaiting their
223
+ matching UPDATE_AFTER.
224
+
225
+ **No Guarantee of Logical Changes**: There is NO guarantee that consecutive snapshots
226
+ will differ. If no new changelog events arrived since the prior yield, the snapshot
227
+ will be logically identical to the previous one. Additionally, even if events were
228
+ processed, the logical result set may remain unchanged (e.g., an INSERT followed
229
+ immediately by a DELETE of the same row).
230
+
231
+ **Return Value**: Each yielded snapshot is a deep copy of the accumulated rows. This
232
+ ensures that modifications to the snapshot will not affect the compressor's internal
233
+ state. The caller is free to mutate the yielded snapshots.
234
+
235
+ **Termination**: The generator raises exceptions when the statement stops:
236
+ - StatementStoppedError: Raised when cursor.may_have_results becomes False,
237
+ indicating the statement entered a terminal phase (STOPPED, FAILED, COMPLETED).
238
+ The exception includes the Statement object for inspection of why it stopped.
239
+ - StatementDeletedError: A subclass of StatementStoppedError raised specifically
240
+ when the statement is deleted (404 response). This is a distinct error case
241
+ from normal stopping.
242
+
243
+ Since streaming queries run indefinitely, any termination is exceptional and
244
+ warrants an exception rather than silent StopIteration.
245
+
246
+ Args:
247
+ fetch_batchsize: The batch size to use for fetching, or None to use cursor.arraysize.
248
+
249
+ Yields:
250
+ Deep copies of the accumulated logical result set after consuming all currently
251
+ available changelog events.
252
+
253
+ Example:
254
+ >>> compressor = cursor.changelog_compressor()
255
+ >>> for snapshot in compressor.snapshots():
256
+ ... process(snapshot)
257
+ ... time.sleep(5) # Optional: wait between polls
258
+ >>> # Generator exits when query is stopped/deleted or fails
259
+ >>> print("Streaming query stopped")
260
+ """
261
+ # Resolve batch size once to ensure consistent behavior across yields
262
+ batchsize = self._resolve_batchsize(fetch_batchsize)
263
+
264
+ while True:
265
+ if not self._cursor.may_have_results:
266
+ # Statement stopped unexpectedly - raise exception with context
267
+ statement = self._cursor.statement
268
+ statement_name = statement.name if statement else "unknown"
269
+ phase_info = statement.phase if statement else None
270
+ phase_suffix = (
271
+ f" (phase: {statement.phase})" if statement and statement.phase else ""
272
+ )
273
+ message = (
274
+ f"Streaming statement '{statement_name}' stopped unexpectedly{phase_suffix}"
275
+ )
276
+ raise StatementStoppedError(
277
+ message,
278
+ statement_name=statement_name,
279
+ statement=statement,
280
+ phase=phase_info,
281
+ )
282
+
283
+ # Fetch and apply all available events, then yield snapshot
284
+ # Pass resolved batchsize (int) so get_current_snapshot() won't re-read cursor.arraysize
285
+ yield self.get_current_snapshot(batchsize)
286
+
287
+ def get_current_snapshot(self, fetch_batchsize: int | None = None) -> list[ResultTupleOrDict]:
288
+ """Fetch all currently available changelog events and return current snapshot.
289
+
290
+ This method fetches ALL currently available changelog events from the cursor (until
291
+ fetchmany returns an empty list), applies them to the internal state via
292
+ _apply_operation(), and returns a deep copy of the accumulated result set.
293
+
294
+ Unlike snapshots(), this method:
295
+ - Does NOT check cursor.may_have_results (caller's responsibility)
296
+ - Does NOT raise StatementStoppedError
297
+ - Returns a single snapshot rather than yielding indefinitely
298
+ - Is non-blocking - returns immediately after consuming available events
299
+
300
+ **Self-Consistency**: The returned snapshot is self-consistent, meaning all currently
301
+ available changelog events have been consumed and applied. No pending UPDATE_BEFORE
302
+ operations remain without their matching UPDATE_AFTER.
303
+
304
+ **Deep Copy**: The returned snapshot is a deep copy. Mutations will not affect the
305
+ compressor's internal state.
306
+
307
+ **Idempotency**: If called when no new events are available, returns the current
308
+ state unchanged. Multiple consecutive calls with no new events will return
309
+ logically identical snapshots.
310
+
311
+ Args:
312
+ fetch_batchsize: The batch size for fetching, or None to use cursor.arraysize.
313
+
314
+ Returns:
315
+ A deep copy of the accumulated result set after consuming all currently
316
+ available changelog events.
317
+
318
+ Example:
319
+ >>> compressor = cursor.changelog_compressor()
320
+ >>> while cursor.may_have_results:
321
+ ... snapshot = compressor.get_current_snapshot()
322
+ ... process(snapshot)
323
+ ... time.sleep(5)
324
+ """
325
+ # Resolve batch size
326
+ batchsize = self._resolve_batchsize(fetch_batchsize)
327
+
328
+ # Fetch all currently available events
329
+ while True:
330
+ batch = self._cursor.fetchmany(batchsize)
331
+ if not batch:
332
+ break
333
+
334
+ for changelogged_row in batch:
335
+ # Must cast because cursor.fetchmany() returns list[ResultRow],
336
+ # but if using a ChangelogCompressor, we know the rows are actually
337
+ # ChangeloggedRow consisting of (Op, ResultTupleOrDict).
338
+ op, row = cast(ChangeloggedRow, changelogged_row)
339
+ self._apply_operation(op, cast(ResultTupleOrDict, row))
340
+
341
+ # Return current snapshot
342
+ return self._copy_accumulated_rows()
343
+
344
+ def close(self) -> None:
345
+ """Close the compressor and release resources.
346
+
347
+ This method closes the underlying cursor and clears any internal state.
348
+ After calling close(), the compressor should not be used anymore.
349
+ """
350
+ self._clear_storage()
351
+ self._clear_pending_update()
352
+ self._cursor.close()
353
+
354
+
355
+ class UpsertColumnsCompressor(ChangelogCompressor):
356
+ """Compressor for statements with upsert columns, handling both tuple and dict rows.
357
+
358
+ Uses dict-based storage for fast O(1) key-based lookups.
359
+
360
+ Expects UPDATE_BEFORE to be immediately followed by its matching UPDATE_AFTER event.
361
+ Only one pending update is tracked at a time.
362
+
363
+ Rows can be either tuples or dicts (as determined by cursor.as_dict). The row format
364
+ decision is made by the result reader layer, and this compressor works transparently
365
+ with either format.
366
+ """
367
+
368
+ _upsert_column_indices: list[int]
369
+ """Zero-based indices of columns that form the upsert key."""
370
+
371
+ _upsert_key_column_names: list[str]
372
+ """Column names corresponding to upsert column indices, for dict row access."""
373
+
374
+ _rows_by_key: dict[tuple, ResultTupleOrDict]
375
+ """Dictionary mapping key tuples to row data. Dict maintains insertion order in Python 3.7+."""
376
+
377
+ _expecting_update_after: bool
378
+ """True when UPDATE_BEFORE has been received and UPDATE_AFTER is expected next."""
379
+
380
+ def __init__(self, cursor: Cursor, statement: Statement):
381
+ """Initialize the compressor with upsert column indices.
382
+
383
+ Args:
384
+ cursor: The cursor to fetch changelog data from.
385
+ statement: The statement associated with the cursor.
386
+
387
+ Raises:
388
+ InterfaceError: If the statement does not have upsert columns.
389
+ """
390
+ super().__init__(cursor, statement)
391
+
392
+ if not statement.traits or not statement.traits.upsert_columns:
393
+ raise InterfaceError("UpsertColumnsCompressor requires a statement with upsert columns")
394
+
395
+ self._upsert_column_indices = statement.traits.upsert_columns
396
+
397
+ # Precompute column names for dict access (used if rows are dicts)
398
+ self._upsert_key_column_names = [
399
+ self._schema.columns[i].name for i in self._upsert_column_indices
400
+ ]
401
+
402
+ self._rows_by_key = {}
403
+ self._expecting_update_after = False
404
+
405
+ def _extract_key(self, row: ResultTupleOrDict) -> tuple:
406
+ """Extract the key tuple from a row based on upsert columns.
407
+
408
+ Handles both tuple and dict row formats. The row format (tuple or dict) is determined
409
+ by cursor.as_dict and guaranteed by the result reader layer.
410
+
411
+ Args:
412
+ row: The row data, either a tuple (if cursor.as_dict=False) or dict (if as_dict=True).
413
+
414
+ Returns:
415
+ A tuple of the key values in column order.
416
+ """
417
+ if isinstance(row, dict):
418
+ # Dict case: use precomputed column names
419
+ return tuple(row[col_name] for col_name in self._upsert_key_column_names)
420
+ else:
421
+ # Tuple case: use direct index access
422
+ return tuple(row[i] for i in self._upsert_column_indices)
423
+
424
+ def _has_pending_update(self) -> bool:
425
+ """Check if there's a pending UPDATE_BEFORE awaiting UPDATE_AFTER.
426
+
427
+ Returns:
428
+ True if a pending update is in progress, False otherwise.
429
+ """
430
+ return self._expecting_update_after
431
+
432
+ def _clear_storage(self) -> None:
433
+ """Clear internal row storage."""
434
+ self._rows_by_key.clear()
435
+
436
+ def _clear_pending_update(self) -> None:
437
+ """Clear pending update tracking state."""
438
+ self._expecting_update_after = False
439
+
440
+ def _apply_operation(self, op: Op, row: ResultTupleOrDict) -> None:
441
+ """Apply a changelog operation to the internal state.
442
+
443
+ Args:
444
+ op: The changelog operation.
445
+ row: The row data.
446
+ """
447
+ self._validate_no_pending_update(op, row)
448
+
449
+ key = self._extract_key(row)
450
+
451
+ if op == Op.INSERT:
452
+ # When iterating _rows_by_keys in get_snapshot(), this newly inserted key will be
453
+ # at the end of the dict, so insertion order will be maintained.
454
+ self._rows_by_key[key] = row
455
+
456
+ elif op == Op.UPDATE_BEFORE:
457
+ # Raise if we receive an UPDATE_BEFORE for a key that doesn't exist in current state
458
+ if key not in self._rows_by_key:
459
+ raise InterfaceError(
460
+ f"Received UPDATE_BEFORE for a key that does not exist in current state: {key}"
461
+ )
462
+ # Mark that we're expecting UPDATE_AFTER next
463
+ self._expecting_update_after = True
464
+
465
+ elif op == Op.UPDATE_AFTER:
466
+ # May or may not have gotten a preceding UPDATE_BEFORE.
467
+ # In either case, verify the key exists in current state
468
+ if key not in self._rows_by_key:
469
+ raise InterfaceError(
470
+ f"Received UPDATE_AFTER for a key that does not exist in current state: {key}"
471
+ )
472
+
473
+ # Update the row
474
+ self._rows_by_key[key] = row
475
+ self._expecting_update_after = False
476
+
477
+ elif op == Op.DELETE:
478
+ if key not in self._rows_by_key:
479
+ raise InterfaceError(
480
+ f"Received DELETE for a key that does not exist in current state: {key}"
481
+ )
482
+ del self._rows_by_key[key]
483
+
484
+ def _copy_accumulated_rows(self) -> list[ResultTupleOrDict]:
485
+ """Return deep copy of rows from dict storage in insertion order.
486
+
487
+ Returns:
488
+ A deep copy list of rows from the dict storage.
489
+ """
490
+ return [copy.deepcopy(row) for row in self._rows_by_key.values()]
491
+
492
+
493
+ class NoUpsertColumnsCompressor(ChangelogCompressor):
494
+ """Compressor for statements without upsert columns, handling both tuple and dict rows.
495
+
496
+ Uses list-based storage with linear scan for row matching.
497
+
498
+ Expects UPDATE_BEFORE to be immediately followed by its matching UPDATE_AFTER event.
499
+ Only one pending update is tracked at a time.
500
+
501
+ Rows can be either tuples or dicts (as determined by cursor.as_dict). Row matching
502
+ is performed by equality comparison, which works identically for both tuple and dict.
503
+ """
504
+
505
+ _rows: list[ResultTupleOrDict]
506
+ """List of rows maintaining insertion order. Scanned linearly for matching."""
507
+
508
+ _pending_update_position: int | None
509
+ """Position of the row marked by the most recent UPDATE_BEFORE, awaiting UPDATE_AFTER.
510
+ None when no UPDATE_BEFORE is pending."""
511
+
512
+ def __init__(self, cursor: Cursor, statement: Statement):
513
+ """Initialize the compressor.
514
+
515
+ Args:
516
+ cursor: The cursor to fetch changelog data from.
517
+ statement: The statement associated with the cursor.
518
+ """
519
+ super().__init__(cursor, statement)
520
+ self._rows = []
521
+ self._pending_update_position = None
522
+
523
+ def _has_pending_update(self) -> bool:
524
+ """Check if there's a pending UPDATE_BEFORE awaiting UPDATE_AFTER.
525
+
526
+ Returns:
527
+ True if a pending update is in progress, False otherwise.
528
+ """
529
+ return self._pending_update_position is not None
530
+
531
+ def _clear_storage(self) -> None:
532
+ """Clear internal row storage."""
533
+ self._rows.clear()
534
+
535
+ def _clear_pending_update(self) -> None:
536
+ """Clear pending update tracking state."""
537
+ self._pending_update_position = None
538
+
539
+ def _find_row_position(self, row: ResultTupleOrDict, operation: Op) -> int:
540
+ """Find the position of a matching row by scanning backwards.
541
+
542
+ Args:
543
+ row: The row to find.
544
+ operation: The operation being performed (for error messaging).
545
+
546
+ Returns:
547
+ The position index of the row.
548
+
549
+ Raises:
550
+ InterfaceError: If the row is not found in current state.
551
+ """
552
+ # Scan backwards to find most recent matching row
553
+ for i in range(len(self._rows) - 1, -1, -1):
554
+ if self._rows[i] == row:
555
+ return i
556
+
557
+ # Row not found - raise error with operation-specific message
558
+ raise InterfaceError(
559
+ f"Received {operation.name} for a row that does not exist in current state: {row}"
560
+ )
561
+
562
+ def _apply_operation(self, op: Op, row: ResultTupleOrDict) -> None:
563
+ """Apply a changelog operation to the internal state.
564
+
565
+ Args:
566
+ op: The changelog operation.
567
+ row: The row data.
568
+ """
569
+ self._validate_no_pending_update(op, row)
570
+
571
+ if op == Op.INSERT:
572
+ self._rows.append(row)
573
+
574
+ elif op == Op.UPDATE_BEFORE:
575
+ # Find row position (raises InterfaceError if not found)
576
+ pos = self._find_row_position(row, Op.UPDATE_BEFORE)
577
+ # Record position for pending update (expecting matching UPDATE_AFTER next)
578
+ self._pending_update_position = pos
579
+
580
+ elif op == Op.UPDATE_AFTER:
581
+ # MUST have gotten a preceding UPDATE_BEFORE.
582
+ # (Without upsert columns, we can't identify the row to update from the
583
+ # UPDATE_AFTER row alone, since the row content has changed.)
584
+ if self._pending_update_position is None:
585
+ raise InterfaceError(
586
+ f"Received UPDATE_AFTER without a preceding UPDATE_BEFORE: {row}"
587
+ )
588
+
589
+ self._rows[self._pending_update_position] = row
590
+ self._pending_update_position = None
591
+
592
+ elif op == Op.DELETE:
593
+ # Find row position (raises InterfaceError if not found)
594
+ pos = self._find_row_position(row, Op.DELETE)
595
+ del self._rows[pos]
596
+
597
+ def _copy_accumulated_rows(self) -> list[ResultTupleOrDict]:
598
+ """Return deep copy of rows from list storage.
599
+
600
+ Returns:
601
+ A deep copy of the list of rows.
602
+ """
603
+ return copy.deepcopy(self._rows)