airbyte-cdk 6.23.0__py3-none-any.whl → 6.23.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +80 -16
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +333 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +86 -15
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
- {airbyte_cdk-6.23.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.23.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/RECORD +14 -13
- {airbyte_cdk-6.23.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.23.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.23.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
|
+
PerPartitionWithGlobalCursor,
|
25
|
+
)
|
23
26
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
24
27
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
25
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -32,7 +35,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
|
32
35
|
ModelToComponentFactory,
|
33
36
|
)
|
34
37
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
35
|
-
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
38
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
|
36
39
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
37
40
|
DeclarativePartitionFactory,
|
38
41
|
StreamSlicerPartitionGenerator,
|
@@ -231,21 +234,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
231
234
|
stream_state=stream_state,
|
232
235
|
)
|
233
236
|
|
234
|
-
retriever = declarative_stream
|
235
|
-
|
236
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
237
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
238
|
-
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
239
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
240
|
-
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
241
|
-
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
242
|
-
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
243
|
-
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
244
|
-
if retriever.cursor:
|
245
|
-
retriever.cursor.set_initial_state(stream_state=stream_state)
|
246
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
247
|
-
# instantiated for the other components that reference it
|
248
|
-
retriever.cursor = None
|
237
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
249
238
|
|
250
239
|
partition_generator = StreamSlicerPartitionGenerator(
|
251
240
|
DeclarativePartitionFactory(
|
@@ -305,6 +294,60 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
305
294
|
cursor=final_state_cursor,
|
306
295
|
)
|
307
296
|
)
|
297
|
+
elif (
|
298
|
+
incremental_sync_component_definition
|
299
|
+
and incremental_sync_component_definition.get("type", "")
|
300
|
+
== DatetimeBasedCursorModel.__name__
|
301
|
+
and self._stream_supports_concurrent_partition_processing(
|
302
|
+
declarative_stream=declarative_stream
|
303
|
+
)
|
304
|
+
and hasattr(declarative_stream.retriever, "stream_slicer")
|
305
|
+
and isinstance(
|
306
|
+
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
307
|
+
)
|
308
|
+
):
|
309
|
+
stream_state = state_manager.get_stream_state(
|
310
|
+
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
311
|
+
)
|
312
|
+
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
313
|
+
|
314
|
+
perpartition_cursor = (
|
315
|
+
self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
316
|
+
state_manager=state_manager,
|
317
|
+
model_type=DatetimeBasedCursorModel,
|
318
|
+
component_definition=incremental_sync_component_definition,
|
319
|
+
stream_name=declarative_stream.name,
|
320
|
+
stream_namespace=declarative_stream.namespace,
|
321
|
+
config=config or {},
|
322
|
+
stream_state=stream_state,
|
323
|
+
partition_router=partition_router,
|
324
|
+
)
|
325
|
+
)
|
326
|
+
|
327
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
328
|
+
|
329
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
330
|
+
DeclarativePartitionFactory(
|
331
|
+
declarative_stream.name,
|
332
|
+
declarative_stream.get_json_schema(),
|
333
|
+
retriever,
|
334
|
+
self.message_repository,
|
335
|
+
),
|
336
|
+
perpartition_cursor,
|
337
|
+
)
|
338
|
+
|
339
|
+
concurrent_streams.append(
|
340
|
+
DefaultStream(
|
341
|
+
partition_generator=partition_generator,
|
342
|
+
name=declarative_stream.name,
|
343
|
+
json_schema=declarative_stream.get_json_schema(),
|
344
|
+
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
345
|
+
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
346
|
+
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
347
|
+
logger=self.logger,
|
348
|
+
cursor=perpartition_cursor,
|
349
|
+
)
|
350
|
+
)
|
308
351
|
else:
|
309
352
|
synchronous_streams.append(declarative_stream)
|
310
353
|
else:
|
@@ -395,6 +438,27 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
395
438
|
return False
|
396
439
|
return True
|
397
440
|
|
441
|
+
def _get_retriever(
|
442
|
+
self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
443
|
+
) -> Retriever:
|
444
|
+
retriever = declarative_stream.retriever
|
445
|
+
|
446
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
447
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
448
|
+
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
449
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
450
|
+
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
451
|
+
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
452
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
453
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
454
|
+
if retriever.cursor:
|
455
|
+
retriever.cursor.set_initial_state(stream_state=stream_state)
|
456
|
+
# We zero it out here, but since this is a cursor reference, the state is still properly
|
457
|
+
# instantiated for the other components that reference it
|
458
|
+
retriever.cursor = None
|
459
|
+
|
460
|
+
return retriever
|
461
|
+
|
398
462
|
@staticmethod
|
399
463
|
def _select_streams(
|
400
464
|
streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
|
@@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
59
59
|
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
|
63
|
-
substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
|
62
|
+
cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
|
64
63
|
**kwargs: Any,
|
65
64
|
):
|
66
65
|
super().__init__(**kwargs)
|
67
|
-
self.
|
68
|
-
self._substream_cursor = substream_cursor
|
66
|
+
self._cursor = cursor
|
69
67
|
|
70
68
|
def filter_records(
|
71
69
|
self,
|
@@ -77,7 +75,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
77
75
|
records = (
|
78
76
|
record
|
79
77
|
for record in records
|
80
|
-
if
|
78
|
+
if self._cursor.should_be_synced(
|
81
79
|
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
82
80
|
# Record stream name is empty cause it is not used durig the filtering
|
83
81
|
Record(data=record, associated_slice=stream_slice, stream_name="")
|
@@ -2,6 +2,10 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
|
6
|
+
ConcurrentCursorFactory,
|
7
|
+
ConcurrentPerPartitionCursor,
|
8
|
+
)
|
5
9
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
6
10
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
7
11
|
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
@@ -21,6 +25,8 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
|
|
21
25
|
|
22
26
|
__all__ = [
|
23
27
|
"CursorFactory",
|
28
|
+
"ConcurrentCursorFactory",
|
29
|
+
"ConcurrentPerPartitionCursor",
|
24
30
|
"DatetimeBasedCursor",
|
25
31
|
"DeclarativeCursor",
|
26
32
|
"GlobalSubstreamCursor",
|
@@ -0,0 +1,333 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import copy
|
6
|
+
import logging
|
7
|
+
import threading
|
8
|
+
from collections import OrderedDict
|
9
|
+
from copy import deepcopy
|
10
|
+
from datetime import timedelta
|
11
|
+
from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
|
12
|
+
|
13
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
14
|
+
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
15
|
+
Timer,
|
16
|
+
iterate_with_last_flag_and_state,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
19
|
+
from airbyte_cdk.sources.message import MessageRepository
|
20
|
+
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
21
|
+
PerPartitionKeySerializer,
|
22
|
+
)
|
23
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
|
24
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
25
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
26
|
+
|
27
|
+
logger = logging.getLogger("airbyte")
|
28
|
+
|
29
|
+
|
30
|
+
class ConcurrentCursorFactory:
|
31
|
+
def __init__(self, create_function: Callable[..., ConcurrentCursor]):
|
32
|
+
self._create_function = create_function
|
33
|
+
|
34
|
+
def create(
|
35
|
+
self, stream_state: Mapping[str, Any], runtime_lookback_window: Optional[timedelta]
|
36
|
+
) -> ConcurrentCursor:
|
37
|
+
return self._create_function(
|
38
|
+
stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
|
39
|
+
)
|
40
|
+
|
41
|
+
|
42
|
+
class ConcurrentPerPartitionCursor(Cursor):
|
43
|
+
"""
|
44
|
+
Manages state per partition when a stream has many partitions, preventing data loss or duplication.
|
45
|
+
|
46
|
+
Attributes:
|
47
|
+
DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
|
48
|
+
|
49
|
+
- **Partition Limitation Logic**
|
50
|
+
Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
|
51
|
+
|
52
|
+
- **Global Cursor Fallback**
|
53
|
+
New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
|
54
|
+
|
55
|
+
CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
|
56
|
+
"""
|
57
|
+
|
58
|
+
DEFAULT_MAX_PARTITIONS_NUMBER = 10000
|
59
|
+
_NO_STATE: Mapping[str, Any] = {}
|
60
|
+
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
61
|
+
_GLOBAL_STATE_KEY = "state"
|
62
|
+
_PERPARTITION_STATE_KEY = "states"
|
63
|
+
_KEY = 0
|
64
|
+
_VALUE = 1
|
65
|
+
|
66
|
+
def __init__(
|
67
|
+
self,
|
68
|
+
cursor_factory: ConcurrentCursorFactory,
|
69
|
+
partition_router: PartitionRouter,
|
70
|
+
stream_name: str,
|
71
|
+
stream_namespace: Optional[str],
|
72
|
+
stream_state: Any,
|
73
|
+
message_repository: MessageRepository,
|
74
|
+
connector_state_manager: ConnectorStateManager,
|
75
|
+
cursor_field: CursorField,
|
76
|
+
) -> None:
|
77
|
+
self._global_cursor: Optional[StreamState] = {}
|
78
|
+
self._stream_name = stream_name
|
79
|
+
self._stream_namespace = stream_namespace
|
80
|
+
self._message_repository = message_repository
|
81
|
+
self._connector_state_manager = connector_state_manager
|
82
|
+
self._cursor_field = cursor_field
|
83
|
+
|
84
|
+
self._cursor_factory = cursor_factory
|
85
|
+
self._partition_router = partition_router
|
86
|
+
|
87
|
+
# The dict is ordered to ensure that once the maximum number of partitions is reached,
|
88
|
+
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
|
89
|
+
self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
|
90
|
+
self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
|
91
|
+
self._finished_partitions: set[str] = set()
|
92
|
+
self._lock = threading.Lock()
|
93
|
+
self._timer = Timer()
|
94
|
+
self._new_global_cursor: Optional[StreamState] = None
|
95
|
+
self._lookback_window: int = 0
|
96
|
+
self._parent_state: Optional[StreamState] = None
|
97
|
+
self._over_limit: int = 0
|
98
|
+
self._partition_serializer = PerPartitionKeySerializer()
|
99
|
+
|
100
|
+
self._set_initial_state(stream_state)
|
101
|
+
|
102
|
+
@property
|
103
|
+
def cursor_field(self) -> CursorField:
|
104
|
+
return self._cursor_field
|
105
|
+
|
106
|
+
@property
|
107
|
+
def state(self) -> MutableMapping[str, Any]:
|
108
|
+
states = []
|
109
|
+
for partition_tuple, cursor in self._cursor_per_partition.items():
|
110
|
+
if cursor.state:
|
111
|
+
states.append(
|
112
|
+
{
|
113
|
+
"partition": self._to_dict(partition_tuple),
|
114
|
+
"cursor": copy.deepcopy(cursor.state),
|
115
|
+
}
|
116
|
+
)
|
117
|
+
state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
|
118
|
+
|
119
|
+
if self._global_cursor:
|
120
|
+
state[self._GLOBAL_STATE_KEY] = self._global_cursor
|
121
|
+
if self._lookback_window is not None:
|
122
|
+
state["lookback_window"] = self._lookback_window
|
123
|
+
if self._parent_state is not None:
|
124
|
+
state["parent_state"] = self._parent_state
|
125
|
+
return state
|
126
|
+
|
127
|
+
def close_partition(self, partition: Partition) -> None:
|
128
|
+
# Attempt to retrieve the stream slice
|
129
|
+
stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
|
130
|
+
|
131
|
+
# Ensure stream_slice is not None
|
132
|
+
if stream_slice is None:
|
133
|
+
raise ValueError("stream_slice cannot be None")
|
134
|
+
|
135
|
+
partition_key = self._to_partition_key(stream_slice.partition)
|
136
|
+
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
137
|
+
with self._lock:
|
138
|
+
self._semaphore_per_partition[partition_key].acquire()
|
139
|
+
cursor = self._cursor_per_partition[partition_key]
|
140
|
+
if (
|
141
|
+
partition_key in self._finished_partitions
|
142
|
+
and self._semaphore_per_partition[partition_key]._value == 0
|
143
|
+
):
|
144
|
+
if (
|
145
|
+
self._new_global_cursor is None
|
146
|
+
or self._new_global_cursor[self.cursor_field.cursor_field_key]
|
147
|
+
< cursor.state[self.cursor_field.cursor_field_key]
|
148
|
+
):
|
149
|
+
self._new_global_cursor = copy.deepcopy(cursor.state)
|
150
|
+
|
151
|
+
def ensure_at_least_one_state_emitted(self) -> None:
|
152
|
+
"""
|
153
|
+
The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
|
154
|
+
called.
|
155
|
+
"""
|
156
|
+
if not any(
|
157
|
+
semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
|
158
|
+
):
|
159
|
+
self._global_cursor = self._new_global_cursor
|
160
|
+
self._lookback_window = self._timer.finish()
|
161
|
+
self._parent_state = self._partition_router.get_stream_state()
|
162
|
+
self._emit_state_message()
|
163
|
+
|
164
|
+
def _emit_state_message(self) -> None:
|
165
|
+
self._connector_state_manager.update_state_for_stream(
|
166
|
+
self._stream_name,
|
167
|
+
self._stream_namespace,
|
168
|
+
self.state,
|
169
|
+
)
|
170
|
+
state_message = self._connector_state_manager.create_state_message(
|
171
|
+
self._stream_name, self._stream_namespace
|
172
|
+
)
|
173
|
+
self._message_repository.emit_message(state_message)
|
174
|
+
|
175
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
176
|
+
if self._timer.is_running():
|
177
|
+
raise RuntimeError("stream_slices has been executed more than once.")
|
178
|
+
|
179
|
+
slices = self._partition_router.stream_slices()
|
180
|
+
self._timer.start()
|
181
|
+
for partition in slices:
|
182
|
+
yield from self._generate_slices_from_partition(partition)
|
183
|
+
|
184
|
+
def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
185
|
+
# Ensure the maximum number of partitions is not exceeded
|
186
|
+
self._ensure_partition_limit()
|
187
|
+
|
188
|
+
cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
|
189
|
+
if not cursor:
|
190
|
+
cursor = self._create_cursor(
|
191
|
+
self._global_cursor,
|
192
|
+
self._lookback_window if self._global_cursor else 0,
|
193
|
+
)
|
194
|
+
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
195
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
196
|
+
threading.Semaphore(0)
|
197
|
+
)
|
198
|
+
|
199
|
+
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
200
|
+
cursor.stream_slices(),
|
201
|
+
lambda: None,
|
202
|
+
):
|
203
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
|
204
|
+
if is_last_slice:
|
205
|
+
self._finished_partitions.add(self._to_partition_key(partition.partition))
|
206
|
+
yield StreamSlice(
|
207
|
+
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
208
|
+
)
|
209
|
+
|
210
|
+
def _ensure_partition_limit(self) -> None:
|
211
|
+
"""
|
212
|
+
Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
|
213
|
+
"""
|
214
|
+
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
215
|
+
self._over_limit += 1
|
216
|
+
oldest_partition = self._cursor_per_partition.popitem(last=False)[
|
217
|
+
0
|
218
|
+
] # Remove the oldest partition
|
219
|
+
logger.warning(
|
220
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
221
|
+
)
|
222
|
+
|
223
|
+
def _set_initial_state(self, stream_state: StreamState) -> None:
|
224
|
+
"""
|
225
|
+
Initialize the cursor's state using the provided `stream_state`.
|
226
|
+
|
227
|
+
This method supports global and per-partition state initialization.
|
228
|
+
|
229
|
+
- **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
|
230
|
+
The `global state` holds a single cursor position representing the latest processed record across all partitions.
|
231
|
+
|
232
|
+
- **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
|
233
|
+
This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
|
234
|
+
|
235
|
+
- **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
|
236
|
+
|
237
|
+
- **Parent State**: (if available) Used to initialize partition routers based on parent streams.
|
238
|
+
|
239
|
+
Args:
|
240
|
+
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
241
|
+
{
|
242
|
+
"states": [
|
243
|
+
{
|
244
|
+
"partition": {
|
245
|
+
"partition_key": "value"
|
246
|
+
},
|
247
|
+
"cursor": {
|
248
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
249
|
+
}
|
250
|
+
}
|
251
|
+
],
|
252
|
+
"state": {
|
253
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
254
|
+
},
|
255
|
+
lookback_window: 10,
|
256
|
+
"parent_state": {
|
257
|
+
"parent_stream_name": {
|
258
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
"""
|
263
|
+
if not stream_state:
|
264
|
+
return
|
265
|
+
|
266
|
+
if self._PERPARTITION_STATE_KEY not in stream_state:
|
267
|
+
# We assume that `stream_state` is in a global format that can be applied to all partitions.
|
268
|
+
# Example: {"global_state_format_key": "global_state_format_value"}
|
269
|
+
self._global_cursor = deepcopy(stream_state)
|
270
|
+
self._new_global_cursor = deepcopy(stream_state)
|
271
|
+
|
272
|
+
else:
|
273
|
+
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
274
|
+
|
275
|
+
for state in stream_state[self._PERPARTITION_STATE_KEY]:
|
276
|
+
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
277
|
+
self._create_cursor(state["cursor"])
|
278
|
+
)
|
279
|
+
self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
|
280
|
+
threading.Semaphore(0)
|
281
|
+
)
|
282
|
+
|
283
|
+
# set default state for missing partitions if it is per partition with fallback to global
|
284
|
+
if self._GLOBAL_STATE_KEY in stream_state:
|
285
|
+
self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
286
|
+
self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
287
|
+
|
288
|
+
# Set initial parent state
|
289
|
+
if stream_state.get("parent_state"):
|
290
|
+
self._parent_state = stream_state["parent_state"]
|
291
|
+
|
292
|
+
# Set parent state for partition routers based on parent streams
|
293
|
+
self._partition_router.set_initial_state(stream_state)
|
294
|
+
|
295
|
+
def observe(self, record: Record) -> None:
|
296
|
+
if not record.associated_slice:
|
297
|
+
raise ValueError(
|
298
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
299
|
+
)
|
300
|
+
self._cursor_per_partition[
|
301
|
+
self._to_partition_key(record.associated_slice.partition)
|
302
|
+
].observe(record)
|
303
|
+
|
304
|
+
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
305
|
+
return self._partition_serializer.to_partition_key(partition)
|
306
|
+
|
307
|
+
def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
|
308
|
+
return self._partition_serializer.to_partition(partition_key)
|
309
|
+
|
310
|
+
def _create_cursor(
|
311
|
+
self, cursor_state: Any, runtime_lookback_window: int = 0
|
312
|
+
) -> ConcurrentCursor:
|
313
|
+
cursor = self._cursor_factory.create(
|
314
|
+
stream_state=deepcopy(cursor_state),
|
315
|
+
runtime_lookback_window=timedelta(seconds=runtime_lookback_window),
|
316
|
+
)
|
317
|
+
return cursor
|
318
|
+
|
319
|
+
def should_be_synced(self, record: Record) -> bool:
|
320
|
+
return self._get_cursor(record).should_be_synced(record)
|
321
|
+
|
322
|
+
def _get_cursor(self, record: Record) -> ConcurrentCursor:
|
323
|
+
if not record.associated_slice:
|
324
|
+
raise ValueError(
|
325
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
326
|
+
)
|
327
|
+
partition_key = self._to_partition_key(record.associated_slice.partition)
|
328
|
+
if partition_key not in self._cursor_per_partition:
|
329
|
+
raise ValueError(
|
330
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
331
|
+
)
|
332
|
+
cursor = self._cursor_per_partition[partition_key]
|
333
|
+
return cursor
|
@@ -303,6 +303,21 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
303
303
|
raise ValueError("A partition needs to be provided in order to get request body json")
|
304
304
|
|
305
305
|
def should_be_synced(self, record: Record) -> bool:
|
306
|
+
if (
|
307
|
+
record.associated_slice
|
308
|
+
and self._to_partition_key(record.associated_slice.partition)
|
309
|
+
not in self._cursor_per_partition
|
310
|
+
):
|
311
|
+
partition_state = (
|
312
|
+
self._state_to_migrate_from
|
313
|
+
if self._state_to_migrate_from
|
314
|
+
else self._NO_CURSOR_STATE
|
315
|
+
)
|
316
|
+
cursor = self._create_cursor(partition_state)
|
317
|
+
|
318
|
+
self._cursor_per_partition[
|
319
|
+
self._to_partition_key(record.associated_slice.partition)
|
320
|
+
] = cursor
|
306
321
|
return self._get_cursor(record).should_be_synced(
|
307
322
|
self._convert_record_to_cursor_record(record)
|
308
323
|
)
|
@@ -88,6 +88,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
88
88
|
)
|
89
89
|
from airbyte_cdk.sources.declarative.incremental import (
|
90
90
|
ChildPartitionResumableFullRefreshCursor,
|
91
|
+
ConcurrentCursorFactory,
|
92
|
+
ConcurrentPerPartitionCursor,
|
91
93
|
CursorFactory,
|
92
94
|
DatetimeBasedCursor,
|
93
95
|
DeclarativeCursor,
|
@@ -461,6 +463,7 @@ from airbyte_cdk.sources.message import (
|
|
461
463
|
InMemoryMessageRepository,
|
462
464
|
LogAppenderMessageRepositoryDecorator,
|
463
465
|
MessageRepository,
|
466
|
+
NoopMessageRepository,
|
464
467
|
)
|
465
468
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
466
469
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -917,6 +920,8 @@ class ModelToComponentFactory:
|
|
917
920
|
stream_namespace: Optional[str],
|
918
921
|
config: Config,
|
919
922
|
stream_state: MutableMapping[str, Any],
|
923
|
+
message_repository: Optional[MessageRepository] = None,
|
924
|
+
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
920
925
|
**kwargs: Any,
|
921
926
|
) -> ConcurrentCursor:
|
922
927
|
component_type = component_definition.get("type")
|
@@ -978,10 +983,22 @@ class ModelToComponentFactory:
|
|
978
983
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
979
984
|
datetime_format=datetime_format,
|
980
985
|
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
981
|
-
is_sequential_state=True,
|
986
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
982
987
|
cursor_granularity=cursor_granularity,
|
983
988
|
)
|
984
989
|
|
990
|
+
# Adjusts the stream state by applying the runtime lookback window.
|
991
|
+
# This is used to ensure correct state handling in case of failed partitions.
|
992
|
+
stream_state_value = stream_state.get(cursor_field.cursor_field_key)
|
993
|
+
if runtime_lookback_window and stream_state_value:
|
994
|
+
new_stream_state = (
|
995
|
+
connector_state_converter.parse_timestamp(stream_state_value)
|
996
|
+
- runtime_lookback_window
|
997
|
+
)
|
998
|
+
stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
|
999
|
+
new_stream_state
|
1000
|
+
)
|
1001
|
+
|
985
1002
|
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
986
1003
|
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
987
1004
|
start_date_runtime_value = self.create_min_max_datetime(
|
@@ -1052,7 +1069,7 @@ class ModelToComponentFactory:
|
|
1052
1069
|
stream_name=stream_name,
|
1053
1070
|
stream_namespace=stream_namespace,
|
1054
1071
|
stream_state=stream_state,
|
1055
|
-
message_repository=self._message_repository,
|
1072
|
+
message_repository=message_repository or self._message_repository,
|
1056
1073
|
connector_state_manager=state_manager,
|
1057
1074
|
connector_state_converter=connector_state_converter,
|
1058
1075
|
cursor_field=cursor_field,
|
@@ -1064,6 +1081,63 @@ class ModelToComponentFactory:
|
|
1064
1081
|
cursor_granularity=cursor_granularity,
|
1065
1082
|
)
|
1066
1083
|
|
1084
|
+
def create_concurrent_cursor_from_perpartition_cursor(
|
1085
|
+
self,
|
1086
|
+
state_manager: ConnectorStateManager,
|
1087
|
+
model_type: Type[BaseModel],
|
1088
|
+
component_definition: ComponentDefinition,
|
1089
|
+
stream_name: str,
|
1090
|
+
stream_namespace: Optional[str],
|
1091
|
+
config: Config,
|
1092
|
+
stream_state: MutableMapping[str, Any],
|
1093
|
+
partition_router: PartitionRouter,
|
1094
|
+
**kwargs: Any,
|
1095
|
+
) -> ConcurrentPerPartitionCursor:
|
1096
|
+
component_type = component_definition.get("type")
|
1097
|
+
if component_definition.get("type") != model_type.__name__:
|
1098
|
+
raise ValueError(
|
1099
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1100
|
+
)
|
1101
|
+
|
1102
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1103
|
+
|
1104
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1105
|
+
raise ValueError(
|
1106
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1107
|
+
)
|
1108
|
+
|
1109
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1110
|
+
datetime_based_cursor_model.cursor_field,
|
1111
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1112
|
+
)
|
1113
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1114
|
+
|
1115
|
+
# Create the cursor factory
|
1116
|
+
cursor_factory = ConcurrentCursorFactory(
|
1117
|
+
partial(
|
1118
|
+
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1119
|
+
state_manager=state_manager,
|
1120
|
+
model_type=model_type,
|
1121
|
+
component_definition=component_definition,
|
1122
|
+
stream_name=stream_name,
|
1123
|
+
stream_namespace=stream_namespace,
|
1124
|
+
config=config,
|
1125
|
+
message_repository=NoopMessageRepository(),
|
1126
|
+
)
|
1127
|
+
)
|
1128
|
+
|
1129
|
+
# Return the concurrent cursor and state converter
|
1130
|
+
return ConcurrentPerPartitionCursor(
|
1131
|
+
cursor_factory=cursor_factory,
|
1132
|
+
partition_router=partition_router,
|
1133
|
+
stream_name=stream_name,
|
1134
|
+
stream_namespace=stream_namespace,
|
1135
|
+
stream_state=stream_state,
|
1136
|
+
message_repository=self._message_repository, # type: ignore
|
1137
|
+
connector_state_manager=state_manager,
|
1138
|
+
cursor_field=cursor_field,
|
1139
|
+
)
|
1140
|
+
|
1067
1141
|
@staticmethod
|
1068
1142
|
def create_constant_backoff_strategy(
|
1069
1143
|
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
@@ -1369,18 +1443,15 @@ class ModelToComponentFactory:
|
|
1369
1443
|
raise ValueError(
|
1370
1444
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1371
1445
|
)
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
else None
|
1382
|
-
),
|
1383
|
-
}
|
1446
|
+
cursor = (
|
1447
|
+
combined_slicers
|
1448
|
+
if isinstance(
|
1449
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1450
|
+
)
|
1451
|
+
else self._create_component_from_model(model=model.incremental_sync, config=config)
|
1452
|
+
)
|
1453
|
+
|
1454
|
+
client_side_incremental_sync = {"cursor": cursor}
|
1384
1455
|
|
1385
1456
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1386
1457
|
cursor_model = model.incremental_sync
|
@@ -2227,7 +2298,7 @@ class ModelToComponentFactory:
|
|
2227
2298
|
if (
|
2228
2299
|
not isinstance(stream_slicer, DatetimeBasedCursor)
|
2229
2300
|
or type(stream_slicer) is not DatetimeBasedCursor
|
2230
|
-
):
|
2301
|
+
) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
|
2231
2302
|
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
2232
2303
|
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
2233
2304
|
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
|
|
160
160
|
stream_slice,
|
161
161
|
next_page_token,
|
162
162
|
self._paginator.get_request_headers,
|
163
|
-
self.
|
163
|
+
self.request_option_provider.get_request_headers,
|
164
164
|
)
|
165
165
|
if isinstance(headers, str):
|
166
166
|
raise ValueError("Request headers cannot be a string")
|
@@ -196,7 +196,9 @@ class ConcurrentCursor(Cursor):
|
|
196
196
|
|
197
197
|
@property
|
198
198
|
def state(self) -> MutableMapping[str, Any]:
|
199
|
-
return self.
|
199
|
+
return self._connector_state_converter.convert_to_state_message(
|
200
|
+
self.cursor_field, self._concurrent_state
|
201
|
+
)
|
200
202
|
|
201
203
|
@property
|
202
204
|
def cursor_field(self) -> CursorField:
|
@@ -241,10 +243,10 @@ class ConcurrentCursor(Cursor):
|
|
241
243
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
242
244
|
|
243
245
|
def close_partition(self, partition: Partition) -> None:
|
244
|
-
slice_count_before = len(self.
|
246
|
+
slice_count_before = len(self._concurrent_state.get("slices", []))
|
245
247
|
self._add_slice_to_state(partition)
|
246
248
|
if slice_count_before < len(
|
247
|
-
self.
|
249
|
+
self._concurrent_state["slices"]
|
248
250
|
): # only emit if at least one slice has been processed
|
249
251
|
self._merge_partitions()
|
250
252
|
self._emit_state_message()
|
@@ -256,11 +258,11 @@ class ConcurrentCursor(Cursor):
|
|
256
258
|
)
|
257
259
|
|
258
260
|
if self._slice_boundary_fields:
|
259
|
-
if "slices" not in self.
|
261
|
+
if "slices" not in self._concurrent_state:
|
260
262
|
raise RuntimeError(
|
261
263
|
f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
|
262
264
|
)
|
263
|
-
self.
|
265
|
+
self._concurrent_state["slices"].append(
|
264
266
|
{
|
265
267
|
self._connector_state_converter.START_KEY: self._extract_from_slice(
|
266
268
|
partition, self._slice_boundary_fields[self._START_BOUNDARY]
|
@@ -288,7 +290,7 @@ class ConcurrentCursor(Cursor):
|
|
288
290
|
"expected. Please contact the Airbyte team."
|
289
291
|
)
|
290
292
|
|
291
|
-
self.
|
293
|
+
self._concurrent_state["slices"].append(
|
292
294
|
{
|
293
295
|
self._connector_state_converter.START_KEY: self.start,
|
294
296
|
self._connector_state_converter.END_KEY: most_recent_cursor_value,
|
@@ -300,9 +302,7 @@ class ConcurrentCursor(Cursor):
|
|
300
302
|
self._connector_state_manager.update_state_for_stream(
|
301
303
|
self._stream_name,
|
302
304
|
self._stream_namespace,
|
303
|
-
self.
|
304
|
-
self._cursor_field, self.state
|
305
|
-
),
|
305
|
+
self.state,
|
306
306
|
)
|
307
307
|
state_message = self._connector_state_manager.create_state_message(
|
308
308
|
self._stream_name, self._stream_namespace
|
@@ -310,7 +310,9 @@ class ConcurrentCursor(Cursor):
|
|
310
310
|
self._message_repository.emit_message(state_message)
|
311
311
|
|
312
312
|
def _merge_partitions(self) -> None:
|
313
|
-
self.
|
313
|
+
self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
|
314
|
+
self._concurrent_state["slices"]
|
315
|
+
)
|
314
316
|
|
315
317
|
def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
|
316
318
|
try:
|
@@ -347,36 +349,42 @@ class ConcurrentCursor(Cursor):
|
|
347
349
|
if self._start is not None and self._is_start_before_first_slice():
|
348
350
|
yield from self._split_per_slice_range(
|
349
351
|
self._start,
|
350
|
-
self.
|
352
|
+
self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
|
351
353
|
False,
|
352
354
|
)
|
353
355
|
|
354
|
-
if len(self.
|
356
|
+
if len(self._concurrent_state["slices"]) == 1:
|
355
357
|
yield from self._split_per_slice_range(
|
356
358
|
self._calculate_lower_boundary_of_last_slice(
|
357
|
-
self.
|
359
|
+
self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
|
358
360
|
),
|
359
361
|
self._end_provider(),
|
360
362
|
True,
|
361
363
|
)
|
362
|
-
elif len(self.
|
363
|
-
for i in range(len(self.
|
364
|
+
elif len(self._concurrent_state["slices"]) > 1:
|
365
|
+
for i in range(len(self._concurrent_state["slices"]) - 1):
|
364
366
|
if self._cursor_granularity:
|
365
367
|
yield from self._split_per_slice_range(
|
366
|
-
self.
|
368
|
+
self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
|
367
369
|
+ self._cursor_granularity,
|
368
|
-
self.
|
370
|
+
self._concurrent_state["slices"][i + 1][
|
371
|
+
self._connector_state_converter.START_KEY
|
372
|
+
],
|
369
373
|
False,
|
370
374
|
)
|
371
375
|
else:
|
372
376
|
yield from self._split_per_slice_range(
|
373
|
-
self.
|
374
|
-
|
377
|
+
self._concurrent_state["slices"][i][
|
378
|
+
self._connector_state_converter.END_KEY
|
379
|
+
],
|
380
|
+
self._concurrent_state["slices"][i + 1][
|
381
|
+
self._connector_state_converter.START_KEY
|
382
|
+
],
|
375
383
|
False,
|
376
384
|
)
|
377
385
|
yield from self._split_per_slice_range(
|
378
386
|
self._calculate_lower_boundary_of_last_slice(
|
379
|
-
self.
|
387
|
+
self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
|
380
388
|
),
|
381
389
|
self._end_provider(),
|
382
390
|
True,
|
@@ -387,7 +395,8 @@ class ConcurrentCursor(Cursor):
|
|
387
395
|
def _is_start_before_first_slice(self) -> bool:
|
388
396
|
return (
|
389
397
|
self._start is not None
|
390
|
-
and self._start
|
398
|
+
and self._start
|
399
|
+
< self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
|
391
400
|
)
|
392
401
|
|
393
402
|
def _calculate_lower_boundary_of_last_slice(
|
@@ -63,7 +63,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
63
63
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
65
65
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
66
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
66
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=wbfk5uduLnEgdwJrKxKvK7TpGGIpsOxMGi1lOniipLA,25577
|
67
67
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
68
68
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
69
69
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
@@ -83,15 +83,16 @@ airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrq
|
|
83
83
|
airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
|
84
84
|
airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
|
85
85
|
airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
|
86
|
-
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=
|
86
|
+
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
|
87
87
|
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD-TGCScXvW95ThNKyPGcx2SiWbG1-H-sc,6552
|
88
88
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
89
89
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
90
|
-
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=
|
90
|
+
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
91
|
+
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=K7201hcQbogtWZCy85gmluWB564_8sg_8MvrAfoy7MA,14466
|
91
92
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
|
92
93
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
93
|
-
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=
|
94
|
-
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=
|
94
|
+
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=9HO-QbL9akvjq2NP7l498RwLA4iQZlBMQW1tZbt34I8,15943
|
95
|
+
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=_FSJjAwL4Zu-i2CngnhTtx8j-NPVSBKj5LwDSPta3Cg,16305
|
95
96
|
airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
|
96
97
|
airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
|
97
98
|
airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
|
@@ -114,7 +115,7 @@ airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=958MMX6_Z
|
|
114
115
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
115
116
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
116
117
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
117
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
118
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=nVso02_fdZXK6bl-8u9LazoIsfkDuFe6mQTjkAWkH3s,117764
|
118
119
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
119
120
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
120
121
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -166,7 +167,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
166
167
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
|
167
168
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
|
168
169
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
169
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
170
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=kgnhVQxRlFqJs2-rDu2-QH-p-GzQU3nKmSp6_aq8u0s,24550
|
170
171
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
|
171
172
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
172
173
|
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=sa99VqU1U45fgZL2qEdw8ueX1tPTPfGxibQ-ZFePjSM,9361
|
@@ -261,7 +262,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
|
|
261
262
|
airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
|
262
263
|
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
|
263
264
|
airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
|
264
|
-
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=
|
265
|
+
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=suObbNi24so8Wcj0Wm32OkJAcuvODAOwp373YBmUPp0,21213
|
265
266
|
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
|
266
267
|
airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
|
267
268
|
airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
|
@@ -347,8 +348,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
347
348
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
348
349
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
349
350
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
350
|
-
airbyte_cdk-6.23.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
351
|
-
airbyte_cdk-6.23.0.dist-info/METADATA,sha256=
|
352
|
-
airbyte_cdk-6.23.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
353
|
-
airbyte_cdk-6.23.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
354
|
-
airbyte_cdk-6.23.0.dist-info/RECORD,,
|
351
|
+
airbyte_cdk-6.23.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
352
|
+
airbyte_cdk-6.23.0.dev0.dist-info/METADATA,sha256=Dv07oYfuxMAjky8DRjj5e5a6TWDJK-SXDIAIQk86X3k,6001
|
353
|
+
airbyte_cdk-6.23.0.dev0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
354
|
+
airbyte_cdk-6.23.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
355
|
+
airbyte_cdk-6.23.0.dev0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|