airbyte-cdk 6.54.10__py3-none-any.whl → 6.54.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +64 -55
- airbyte_cdk/sources/declarative/interpolation/jinja.py +4 -2
- {airbyte_cdk-6.54.10.dist-info → airbyte_cdk-6.54.11.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.54.10.dist-info → airbyte_cdk-6.54.11.dist-info}/RECORD +8 -8
- {airbyte_cdk-6.54.10.dist-info → airbyte_cdk-6.54.11.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.54.10.dist-info → airbyte_cdk-6.54.11.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.54.10.dist-info → airbyte_cdk-6.54.11.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.54.10.dist-info → airbyte_cdk-6.54.11.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,7 @@ import time
|
|
9
9
|
from collections import OrderedDict
|
10
10
|
from copy import deepcopy
|
11
11
|
from datetime import timedelta
|
12
|
-
from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
|
12
|
+
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional
|
13
13
|
|
14
14
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
15
15
|
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
@@ -66,8 +66,8 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
66
66
|
_GLOBAL_STATE_KEY = "state"
|
67
67
|
_PERPARTITION_STATE_KEY = "states"
|
68
68
|
_IS_PARTITION_DUPLICATION_LOGGED = False
|
69
|
-
|
70
|
-
|
69
|
+
_PARENT_STATE = 0
|
70
|
+
_GENERATION_SEQUENCE = 1
|
71
71
|
|
72
72
|
def __init__(
|
73
73
|
self,
|
@@ -99,19 +99,29 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
99
99
|
self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
|
100
100
|
|
101
101
|
# Parent-state tracking: store each partition’s parent state in creation order
|
102
|
-
self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] =
|
102
|
+
self._partition_parent_state_map: OrderedDict[str, tuple[Mapping[str, Any], int]] = (
|
103
|
+
OrderedDict()
|
104
|
+
)
|
105
|
+
self._parent_state: Optional[StreamState] = None
|
106
|
+
|
107
|
+
# Tracks when the last slice for partition is emitted
|
108
|
+
self._partitions_done_generating_stream_slices: set[str] = set()
|
109
|
+
# Used to track the index of partitions that are not closed yet
|
110
|
+
self._processing_partitions_indexes: List[int] = list()
|
111
|
+
self._generated_partitions_count: int = 0
|
112
|
+
# Dictionary to map partition keys to their index
|
113
|
+
self._partition_key_to_index: dict[str, int] = {}
|
103
114
|
|
104
|
-
self._finished_partitions: set[str] = set()
|
105
115
|
self._lock = threading.Lock()
|
106
|
-
self._timer = Timer()
|
107
|
-
self._new_global_cursor: Optional[StreamState] = None
|
108
116
|
self._lookback_window: int = 0
|
109
|
-
self.
|
117
|
+
self._new_global_cursor: Optional[StreamState] = None
|
110
118
|
self._number_of_partitions: int = 0
|
111
119
|
self._use_global_cursor: bool = use_global_cursor
|
112
120
|
self._partition_serializer = PerPartitionKeySerializer()
|
121
|
+
|
113
122
|
# Track the last time a state message was emitted
|
114
123
|
self._last_emission_time: float = 0.0
|
124
|
+
self._timer = Timer()
|
115
125
|
|
116
126
|
self._set_initial_state(stream_state)
|
117
127
|
|
@@ -157,60 +167,37 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
157
167
|
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
158
168
|
cursor = self._cursor_per_partition[partition_key]
|
159
169
|
if (
|
160
|
-
partition_key in self.
|
170
|
+
partition_key in self._partitions_done_generating_stream_slices
|
161
171
|
and self._semaphore_per_partition[partition_key]._value == 0
|
162
172
|
):
|
163
173
|
self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
|
164
174
|
|
175
|
+
# Clean up the partition if it is fully processed
|
176
|
+
self._cleanup_if_done(partition_key)
|
177
|
+
|
165
178
|
self._check_and_update_parent_state()
|
166
179
|
|
167
180
|
self._emit_state_message()
|
168
181
|
|
169
182
|
def _check_and_update_parent_state(self) -> None:
|
170
|
-
"""
|
171
|
-
Pop the leftmost partition state from _partition_parent_state_map only if
|
172
|
-
*all partitions* up to (and including) that partition key in _semaphore_per_partition
|
173
|
-
are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
|
174
|
-
Additionally, delete finished semaphores with a value of 0 to free up memory,
|
175
|
-
as they are only needed to track errors and completion status.
|
176
|
-
"""
|
177
183
|
last_closed_state = None
|
178
184
|
|
179
185
|
while self._partition_parent_state_map:
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
# Verify ALL partitions from the left up to earliest_key are finished
|
184
|
-
all_left_finished = True
|
185
|
-
for p_key, sem in list(
|
186
|
-
self._semaphore_per_partition.items()
|
187
|
-
): # Use list to allow modification during iteration
|
188
|
-
# If any earlier partition is still not finished, we must stop
|
189
|
-
if p_key not in self._finished_partitions or sem._value != 0:
|
190
|
-
all_left_finished = False
|
191
|
-
break
|
192
|
-
# Once we've reached earliest_key in the semaphore order, we can stop checking
|
193
|
-
if p_key == earliest_key:
|
194
|
-
break
|
195
|
-
|
196
|
-
# If the partitions up to earliest_key are not all finished, break the while-loop
|
197
|
-
if not all_left_finished:
|
198
|
-
break
|
186
|
+
earliest_key, (candidate_state, candidate_seq) = next(
|
187
|
+
iter(self._partition_parent_state_map.items())
|
188
|
+
)
|
199
189
|
|
200
|
-
#
|
201
|
-
|
202
|
-
|
190
|
+
# if any partition that started <= candidate_seq is still open, we must wait
|
191
|
+
if (
|
192
|
+
self._processing_partitions_indexes
|
193
|
+
and self._processing_partitions_indexes[0] <= candidate_seq
|
194
|
+
):
|
195
|
+
break
|
203
196
|
|
204
|
-
#
|
205
|
-
|
206
|
-
|
207
|
-
if p_key in self._finished_partitions and sem._value == 0:
|
208
|
-
del self._semaphore_per_partition[p_key]
|
209
|
-
logger.debug(f"Deleted finished semaphore for partition {p_key} with value 0")
|
210
|
-
if p_key == earliest_key:
|
211
|
-
break
|
197
|
+
# safe to pop
|
198
|
+
self._partition_parent_state_map.popitem(last=False)
|
199
|
+
last_closed_state = candidate_state
|
212
200
|
|
213
|
-
# Update _parent_state if we popped at least one partition
|
214
201
|
if last_closed_state is not None:
|
215
202
|
self._parent_state = last_closed_state
|
216
203
|
|
@@ -289,18 +276,24 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
289
276
|
if not self._IS_PARTITION_DUPLICATION_LOGGED:
|
290
277
|
logger.warning(f"Partition duplication detected for stream {self._stream_name}")
|
291
278
|
self._IS_PARTITION_DUPLICATION_LOGGED = True
|
279
|
+
return
|
292
280
|
else:
|
293
281
|
self._semaphore_per_partition[partition_key] = threading.Semaphore(0)
|
294
282
|
|
295
283
|
with self._lock:
|
284
|
+
seq = self._generated_partitions_count
|
285
|
+
self._generated_partitions_count += 1
|
286
|
+
self._processing_partitions_indexes.append(seq)
|
287
|
+
self._partition_key_to_index[partition_key] = seq
|
288
|
+
|
296
289
|
if (
|
297
290
|
len(self._partition_parent_state_map) == 0
|
298
291
|
or self._partition_parent_state_map[
|
299
292
|
next(reversed(self._partition_parent_state_map))
|
300
|
-
]
|
293
|
+
][self._PARENT_STATE]
|
301
294
|
!= parent_state
|
302
295
|
):
|
303
|
-
self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
|
296
|
+
self._partition_parent_state_map[partition_key] = (deepcopy(parent_state), seq)
|
304
297
|
|
305
298
|
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
306
299
|
cursor.stream_slices(),
|
@@ -308,7 +301,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
308
301
|
):
|
309
302
|
self._semaphore_per_partition[partition_key].release()
|
310
303
|
if is_last_slice:
|
311
|
-
self.
|
304
|
+
self._partitions_done_generating_stream_slices.add(partition_key)
|
312
305
|
yield StreamSlice(
|
313
306
|
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
314
307
|
)
|
@@ -338,14 +331,11 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
338
331
|
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
339
332
|
# Try removing finished partitions first
|
340
333
|
for partition_key in list(self._cursor_per_partition.keys()):
|
341
|
-
if partition_key in self.
|
342
|
-
partition_key not in self._semaphore_per_partition
|
343
|
-
or self._semaphore_per_partition[partition_key]._value == 0
|
344
|
-
):
|
334
|
+
if partition_key not in self._partition_key_to_index:
|
345
335
|
oldest_partition = self._cursor_per_partition.pop(
|
346
336
|
partition_key
|
347
337
|
) # Remove the oldest partition
|
348
|
-
logger.
|
338
|
+
logger.debug(
|
349
339
|
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
|
350
340
|
)
|
351
341
|
break
|
@@ -474,6 +464,25 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
474
464
|
):
|
475
465
|
self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
|
476
466
|
|
467
|
+
def _cleanup_if_done(self, partition_key: str) -> None:
|
468
|
+
"""
|
469
|
+
Free every in-memory structure that belonged to a completed partition:
|
470
|
+
cursor, semaphore, flag inside `_finished_partitions`
|
471
|
+
"""
|
472
|
+
if not (
|
473
|
+
partition_key in self._partitions_done_generating_stream_slices
|
474
|
+
and self._semaphore_per_partition[partition_key]._value == 0
|
475
|
+
):
|
476
|
+
return
|
477
|
+
|
478
|
+
self._semaphore_per_partition.pop(partition_key, None)
|
479
|
+
self._partitions_done_generating_stream_slices.discard(partition_key)
|
480
|
+
|
481
|
+
seq = self._partition_key_to_index.pop(partition_key)
|
482
|
+
self._processing_partitions_indexes.remove(seq)
|
483
|
+
|
484
|
+
logger.debug(f"Partition {partition_key} fully processed and cleaned up.")
|
485
|
+
|
477
486
|
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
478
487
|
return self._partition_serializer.to_partition_key(partition)
|
479
488
|
|
@@ -147,16 +147,18 @@ class JinjaInterpolation(Interpolation):
|
|
147
147
|
# It can be returned as is
|
148
148
|
return s
|
149
149
|
|
150
|
+
@staticmethod
|
150
151
|
@cache
|
151
|
-
def _find_undeclared_variables(
|
152
|
+
def _find_undeclared_variables(s: Optional[str]) -> Set[str]:
|
152
153
|
"""
|
153
154
|
Find undeclared variables and cache them
|
154
155
|
"""
|
155
156
|
ast = _ENVIRONMENT.parse(s) # type: ignore # parse is able to handle None
|
156
157
|
return meta.find_undeclared_variables(ast)
|
157
158
|
|
159
|
+
@staticmethod
|
158
160
|
@cache
|
159
|
-
def _compile(
|
161
|
+
def _compile(s: str) -> Template:
|
160
162
|
"""
|
161
163
|
We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
|
162
164
|
"""
|
@@ -111,7 +111,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=vCpwX1PVRFP
|
|
111
111
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=WJyA2OYIEgFpVP5Y3o0tIj69AV6IKkn9B16MeXaEItI,6513
|
112
112
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
113
113
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
114
|
-
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=
|
114
|
+
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=araWk039M89c6lQHEUltfM1VI_xGw9gZIDXRWWF6SkM,22591
|
115
115
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=Rbe6lJLTtZ5en33MwZiB9-H9-AwDMNHgwBZs8EqhYqk,22172
|
116
116
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
117
117
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=2tsE6FgXzemf4fZZ4uGtd8QpRBl9GJ2CRqSNJE5p0EI,16077
|
@@ -125,7 +125,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py,sha256=h36
|
|
125
125
|
airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha256=myVaNtFqxOAwrbp93rgd1dhkqyuvXvET9rsimQ89ktc,1873
|
126
126
|
airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkHqGlfa87G6VYMtBAQWin7ECKpfMdrDcg0JO5_rhc,3212
|
127
127
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
|
128
|
-
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=
|
128
|
+
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=oFGKs3oX0xO6DOL4E9x8rhxwbEoRcgx4HJVIL1RQ9c4,7269
|
129
129
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=xRcmjape4_WGmKMJpmBsKY0k4OHJDM46Hv3V-dlSz3w,5640
|
130
130
|
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=ciXtM7Qhus170ZwP8B9Ac4VScX2FPBYvlbZRv_r376U,24692
|
131
131
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -421,9 +421,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
421
421
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
422
422
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
423
423
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
424
|
-
airbyte_cdk-6.54.
|
425
|
-
airbyte_cdk-6.54.
|
426
|
-
airbyte_cdk-6.54.
|
427
|
-
airbyte_cdk-6.54.
|
428
|
-
airbyte_cdk-6.54.
|
429
|
-
airbyte_cdk-6.54.
|
424
|
+
airbyte_cdk-6.54.11.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
425
|
+
airbyte_cdk-6.54.11.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
426
|
+
airbyte_cdk-6.54.11.dist-info/METADATA,sha256=GAc-NcvDVzTVP4sUrNphva1hP0kyZt-ANEM5Qz5CgPY,6344
|
427
|
+
airbyte_cdk-6.54.11.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
428
|
+
airbyte_cdk-6.54.11.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
|
429
|
+
airbyte_cdk-6.54.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|