airbyte-cdk 6.22.0__py3-none-any.whl → 6.23.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +6 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +81 -16
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +333 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -4
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +120 -21
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
- airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
- {airbyte_cdk-6.22.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.22.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/RECORD +19 -17
- {airbyte_cdk-6.22.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.22.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.22.0.dist-info → airbyte_cdk-6.23.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -171,6 +171,12 @@ def create_declarative_source(
|
|
171
171
|
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
|
172
172
|
f"of the config but config only has keys: {list(config.keys() if config else [])}"
|
173
173
|
)
|
174
|
+
if not isinstance(config["__injected_declarative_manifest"], dict):
|
175
|
+
raise ValueError(
|
176
|
+
"Invalid config: `__injected_declarative_manifest` should be a dictionary, "
|
177
|
+
f"but got type: {type(config['__injected_declarative_manifest'])}"
|
178
|
+
)
|
179
|
+
|
174
180
|
return ConcurrentDeclarativeSource(
|
175
181
|
config=config,
|
176
182
|
catalog=catalog,
|
@@ -52,6 +52,7 @@ def get_limits(config: Mapping[str, Any]) -> TestReadLimits:
|
|
52
52
|
def create_source(config: Mapping[str, Any], limits: TestReadLimits) -> ManifestDeclarativeSource:
|
53
53
|
manifest = config["__injected_declarative_manifest"]
|
54
54
|
return ManifestDeclarativeSource(
|
55
|
+
config=config,
|
55
56
|
emit_connector_builder_messages=True,
|
56
57
|
source_config=manifest,
|
57
58
|
component_factory=ModelToComponentFactory(
|
@@ -20,6 +20,9 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
|
+
PerPartitionWithGlobalCursor,
|
25
|
+
)
|
23
26
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
24
27
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
25
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -32,7 +35,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
|
32
35
|
ModelToComponentFactory,
|
33
36
|
)
|
34
37
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
35
|
-
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
38
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
|
36
39
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
37
40
|
DeclarativePartitionFactory,
|
38
41
|
StreamSlicerPartitionGenerator,
|
@@ -77,6 +80,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
77
80
|
|
78
81
|
super().__init__(
|
79
82
|
source_config=source_config,
|
83
|
+
config=config,
|
80
84
|
debug=debug,
|
81
85
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
82
86
|
component_factory=component_factory,
|
@@ -230,21 +234,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
230
234
|
stream_state=stream_state,
|
231
235
|
)
|
232
236
|
|
233
|
-
retriever = declarative_stream
|
234
|
-
|
235
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
236
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
237
|
-
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
238
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
239
|
-
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
240
|
-
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
241
|
-
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
242
|
-
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
243
|
-
if retriever.cursor:
|
244
|
-
retriever.cursor.set_initial_state(stream_state=stream_state)
|
245
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
246
|
-
# instantiated for the other components that reference it
|
247
|
-
retriever.cursor = None
|
237
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
248
238
|
|
249
239
|
partition_generator = StreamSlicerPartitionGenerator(
|
250
240
|
DeclarativePartitionFactory(
|
@@ -304,6 +294,60 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
304
294
|
cursor=final_state_cursor,
|
305
295
|
)
|
306
296
|
)
|
297
|
+
elif (
|
298
|
+
incremental_sync_component_definition
|
299
|
+
and incremental_sync_component_definition.get("type", "")
|
300
|
+
== DatetimeBasedCursorModel.__name__
|
301
|
+
and self._stream_supports_concurrent_partition_processing(
|
302
|
+
declarative_stream=declarative_stream
|
303
|
+
)
|
304
|
+
and hasattr(declarative_stream.retriever, "stream_slicer")
|
305
|
+
and isinstance(
|
306
|
+
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
307
|
+
)
|
308
|
+
):
|
309
|
+
stream_state = state_manager.get_stream_state(
|
310
|
+
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
311
|
+
)
|
312
|
+
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
313
|
+
|
314
|
+
perpartition_cursor = (
|
315
|
+
self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
316
|
+
state_manager=state_manager,
|
317
|
+
model_type=DatetimeBasedCursorModel,
|
318
|
+
component_definition=incremental_sync_component_definition,
|
319
|
+
stream_name=declarative_stream.name,
|
320
|
+
stream_namespace=declarative_stream.namespace,
|
321
|
+
config=config or {},
|
322
|
+
stream_state=stream_state,
|
323
|
+
partition_router=partition_router,
|
324
|
+
)
|
325
|
+
)
|
326
|
+
|
327
|
+
retriever = self._get_retriever(declarative_stream, stream_state)
|
328
|
+
|
329
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
330
|
+
DeclarativePartitionFactory(
|
331
|
+
declarative_stream.name,
|
332
|
+
declarative_stream.get_json_schema(),
|
333
|
+
retriever,
|
334
|
+
self.message_repository,
|
335
|
+
),
|
336
|
+
perpartition_cursor,
|
337
|
+
)
|
338
|
+
|
339
|
+
concurrent_streams.append(
|
340
|
+
DefaultStream(
|
341
|
+
partition_generator=partition_generator,
|
342
|
+
name=declarative_stream.name,
|
343
|
+
json_schema=declarative_stream.get_json_schema(),
|
344
|
+
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
345
|
+
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
346
|
+
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
347
|
+
logger=self.logger,
|
348
|
+
cursor=perpartition_cursor,
|
349
|
+
)
|
350
|
+
)
|
307
351
|
else:
|
308
352
|
synchronous_streams.append(declarative_stream)
|
309
353
|
else:
|
@@ -394,6 +438,27 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
394
438
|
return False
|
395
439
|
return True
|
396
440
|
|
441
|
+
def _get_retriever(
|
442
|
+
self, declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
443
|
+
) -> Retriever:
|
444
|
+
retriever = declarative_stream.retriever
|
445
|
+
|
446
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
447
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
448
|
+
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
449
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
450
|
+
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
451
|
+
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
452
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
453
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
454
|
+
if retriever.cursor:
|
455
|
+
retriever.cursor.set_initial_state(stream_state=stream_state)
|
456
|
+
# We zero it out here, but since this is a cursor reference, the state is still properly
|
457
|
+
# instantiated for the other components that reference it
|
458
|
+
retriever.cursor = None
|
459
|
+
|
460
|
+
return retriever
|
461
|
+
|
397
462
|
@staticmethod
|
398
463
|
def _select_streams(
|
399
464
|
streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
|
@@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
59
59
|
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
|
63
|
-
substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
|
62
|
+
cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
|
64
63
|
**kwargs: Any,
|
65
64
|
):
|
66
65
|
super().__init__(**kwargs)
|
67
|
-
self.
|
68
|
-
self._substream_cursor = substream_cursor
|
66
|
+
self._cursor = cursor
|
69
67
|
|
70
68
|
def filter_records(
|
71
69
|
self,
|
@@ -77,7 +75,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
77
75
|
records = (
|
78
76
|
record
|
79
77
|
for record in records
|
80
|
-
if
|
78
|
+
if self._cursor.should_be_synced(
|
81
79
|
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
82
80
|
# Record stream name is empty cause it is not used durig the filtering
|
83
81
|
Record(data=record, associated_slice=stream_slice, stream_name="")
|
@@ -2,6 +2,10 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
|
6
|
+
ConcurrentCursorFactory,
|
7
|
+
ConcurrentPerPartitionCursor,
|
8
|
+
)
|
5
9
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
6
10
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
7
11
|
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
@@ -21,6 +25,8 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
|
|
21
25
|
|
22
26
|
__all__ = [
|
23
27
|
"CursorFactory",
|
28
|
+
"ConcurrentCursorFactory",
|
29
|
+
"ConcurrentPerPartitionCursor",
|
24
30
|
"DatetimeBasedCursor",
|
25
31
|
"DeclarativeCursor",
|
26
32
|
"GlobalSubstreamCursor",
|
@@ -0,0 +1,333 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import copy
|
6
|
+
import logging
|
7
|
+
import threading
|
8
|
+
from collections import OrderedDict
|
9
|
+
from copy import deepcopy
|
10
|
+
from datetime import timedelta
|
11
|
+
from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
|
12
|
+
|
13
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
14
|
+
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
15
|
+
Timer,
|
16
|
+
iterate_with_last_flag_and_state,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
19
|
+
from airbyte_cdk.sources.message import MessageRepository
|
20
|
+
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
21
|
+
PerPartitionKeySerializer,
|
22
|
+
)
|
23
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
|
24
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
25
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
26
|
+
|
27
|
+
logger = logging.getLogger("airbyte")
|
28
|
+
|
29
|
+
|
30
|
+
class ConcurrentCursorFactory:
|
31
|
+
def __init__(self, create_function: Callable[..., ConcurrentCursor]):
|
32
|
+
self._create_function = create_function
|
33
|
+
|
34
|
+
def create(
|
35
|
+
self, stream_state: Mapping[str, Any], runtime_lookback_window: Optional[timedelta]
|
36
|
+
) -> ConcurrentCursor:
|
37
|
+
return self._create_function(
|
38
|
+
stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
|
39
|
+
)
|
40
|
+
|
41
|
+
|
42
|
+
class ConcurrentPerPartitionCursor(Cursor):
|
43
|
+
"""
|
44
|
+
Manages state per partition when a stream has many partitions, preventing data loss or duplication.
|
45
|
+
|
46
|
+
Attributes:
|
47
|
+
DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
|
48
|
+
|
49
|
+
- **Partition Limitation Logic**
|
50
|
+
Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
|
51
|
+
|
52
|
+
- **Global Cursor Fallback**
|
53
|
+
New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
|
54
|
+
|
55
|
+
CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
|
56
|
+
"""
|
57
|
+
|
58
|
+
DEFAULT_MAX_PARTITIONS_NUMBER = 10000
|
59
|
+
_NO_STATE: Mapping[str, Any] = {}
|
60
|
+
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
61
|
+
_GLOBAL_STATE_KEY = "state"
|
62
|
+
_PERPARTITION_STATE_KEY = "states"
|
63
|
+
_KEY = 0
|
64
|
+
_VALUE = 1
|
65
|
+
|
66
|
+
def __init__(
|
67
|
+
self,
|
68
|
+
cursor_factory: ConcurrentCursorFactory,
|
69
|
+
partition_router: PartitionRouter,
|
70
|
+
stream_name: str,
|
71
|
+
stream_namespace: Optional[str],
|
72
|
+
stream_state: Any,
|
73
|
+
message_repository: MessageRepository,
|
74
|
+
connector_state_manager: ConnectorStateManager,
|
75
|
+
cursor_field: CursorField,
|
76
|
+
) -> None:
|
77
|
+
self._global_cursor: Optional[StreamState] = {}
|
78
|
+
self._stream_name = stream_name
|
79
|
+
self._stream_namespace = stream_namespace
|
80
|
+
self._message_repository = message_repository
|
81
|
+
self._connector_state_manager = connector_state_manager
|
82
|
+
self._cursor_field = cursor_field
|
83
|
+
|
84
|
+
self._cursor_factory = cursor_factory
|
85
|
+
self._partition_router = partition_router
|
86
|
+
|
87
|
+
# The dict is ordered to ensure that once the maximum number of partitions is reached,
|
88
|
+
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
|
89
|
+
self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
|
90
|
+
self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
|
91
|
+
self._finished_partitions: set[str] = set()
|
92
|
+
self._lock = threading.Lock()
|
93
|
+
self._timer = Timer()
|
94
|
+
self._new_global_cursor: Optional[StreamState] = None
|
95
|
+
self._lookback_window: int = 0
|
96
|
+
self._parent_state: Optional[StreamState] = None
|
97
|
+
self._over_limit: int = 0
|
98
|
+
self._partition_serializer = PerPartitionKeySerializer()
|
99
|
+
|
100
|
+
self._set_initial_state(stream_state)
|
101
|
+
|
102
|
+
@property
|
103
|
+
def cursor_field(self) -> CursorField:
|
104
|
+
return self._cursor_field
|
105
|
+
|
106
|
+
@property
|
107
|
+
def state(self) -> MutableMapping[str, Any]:
|
108
|
+
states = []
|
109
|
+
for partition_tuple, cursor in self._cursor_per_partition.items():
|
110
|
+
if cursor.state:
|
111
|
+
states.append(
|
112
|
+
{
|
113
|
+
"partition": self._to_dict(partition_tuple),
|
114
|
+
"cursor": copy.deepcopy(cursor.state),
|
115
|
+
}
|
116
|
+
)
|
117
|
+
state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
|
118
|
+
|
119
|
+
if self._global_cursor:
|
120
|
+
state[self._GLOBAL_STATE_KEY] = self._global_cursor
|
121
|
+
if self._lookback_window is not None:
|
122
|
+
state["lookback_window"] = self._lookback_window
|
123
|
+
if self._parent_state is not None:
|
124
|
+
state["parent_state"] = self._parent_state
|
125
|
+
return state
|
126
|
+
|
127
|
+
def close_partition(self, partition: Partition) -> None:
|
128
|
+
# Attempt to retrieve the stream slice
|
129
|
+
stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
|
130
|
+
|
131
|
+
# Ensure stream_slice is not None
|
132
|
+
if stream_slice is None:
|
133
|
+
raise ValueError("stream_slice cannot be None")
|
134
|
+
|
135
|
+
partition_key = self._to_partition_key(stream_slice.partition)
|
136
|
+
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
137
|
+
with self._lock:
|
138
|
+
self._semaphore_per_partition[partition_key].acquire()
|
139
|
+
cursor = self._cursor_per_partition[partition_key]
|
140
|
+
if (
|
141
|
+
partition_key in self._finished_partitions
|
142
|
+
and self._semaphore_per_partition[partition_key]._value == 0
|
143
|
+
):
|
144
|
+
if (
|
145
|
+
self._new_global_cursor is None
|
146
|
+
or self._new_global_cursor[self.cursor_field.cursor_field_key]
|
147
|
+
< cursor.state[self.cursor_field.cursor_field_key]
|
148
|
+
):
|
149
|
+
self._new_global_cursor = copy.deepcopy(cursor.state)
|
150
|
+
|
151
|
+
def ensure_at_least_one_state_emitted(self) -> None:
|
152
|
+
"""
|
153
|
+
The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
|
154
|
+
called.
|
155
|
+
"""
|
156
|
+
if not any(
|
157
|
+
semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
|
158
|
+
):
|
159
|
+
self._global_cursor = self._new_global_cursor
|
160
|
+
self._lookback_window = self._timer.finish()
|
161
|
+
self._parent_state = self._partition_router.get_stream_state()
|
162
|
+
self._emit_state_message()
|
163
|
+
|
164
|
+
def _emit_state_message(self) -> None:
|
165
|
+
self._connector_state_manager.update_state_for_stream(
|
166
|
+
self._stream_name,
|
167
|
+
self._stream_namespace,
|
168
|
+
self.state,
|
169
|
+
)
|
170
|
+
state_message = self._connector_state_manager.create_state_message(
|
171
|
+
self._stream_name, self._stream_namespace
|
172
|
+
)
|
173
|
+
self._message_repository.emit_message(state_message)
|
174
|
+
|
175
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
176
|
+
if self._timer.is_running():
|
177
|
+
raise RuntimeError("stream_slices has been executed more than once.")
|
178
|
+
|
179
|
+
slices = self._partition_router.stream_slices()
|
180
|
+
self._timer.start()
|
181
|
+
for partition in slices:
|
182
|
+
yield from self._generate_slices_from_partition(partition)
|
183
|
+
|
184
|
+
def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
185
|
+
# Ensure the maximum number of partitions is not exceeded
|
186
|
+
self._ensure_partition_limit()
|
187
|
+
|
188
|
+
cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
|
189
|
+
if not cursor:
|
190
|
+
cursor = self._create_cursor(
|
191
|
+
self._global_cursor,
|
192
|
+
self._lookback_window if self._global_cursor else 0,
|
193
|
+
)
|
194
|
+
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
195
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
196
|
+
threading.Semaphore(0)
|
197
|
+
)
|
198
|
+
|
199
|
+
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
200
|
+
cursor.stream_slices(),
|
201
|
+
lambda: None,
|
202
|
+
):
|
203
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
|
204
|
+
if is_last_slice:
|
205
|
+
self._finished_partitions.add(self._to_partition_key(partition.partition))
|
206
|
+
yield StreamSlice(
|
207
|
+
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
208
|
+
)
|
209
|
+
|
210
|
+
def _ensure_partition_limit(self) -> None:
|
211
|
+
"""
|
212
|
+
Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
|
213
|
+
"""
|
214
|
+
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
215
|
+
self._over_limit += 1
|
216
|
+
oldest_partition = self._cursor_per_partition.popitem(last=False)[
|
217
|
+
0
|
218
|
+
] # Remove the oldest partition
|
219
|
+
logger.warning(
|
220
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
221
|
+
)
|
222
|
+
|
223
|
+
def _set_initial_state(self, stream_state: StreamState) -> None:
|
224
|
+
"""
|
225
|
+
Initialize the cursor's state using the provided `stream_state`.
|
226
|
+
|
227
|
+
This method supports global and per-partition state initialization.
|
228
|
+
|
229
|
+
- **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
|
230
|
+
The `global state` holds a single cursor position representing the latest processed record across all partitions.
|
231
|
+
|
232
|
+
- **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
|
233
|
+
This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
|
234
|
+
|
235
|
+
- **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
|
236
|
+
|
237
|
+
- **Parent State**: (if available) Used to initialize partition routers based on parent streams.
|
238
|
+
|
239
|
+
Args:
|
240
|
+
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
241
|
+
{
|
242
|
+
"states": [
|
243
|
+
{
|
244
|
+
"partition": {
|
245
|
+
"partition_key": "value"
|
246
|
+
},
|
247
|
+
"cursor": {
|
248
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
249
|
+
}
|
250
|
+
}
|
251
|
+
],
|
252
|
+
"state": {
|
253
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
254
|
+
},
|
255
|
+
lookback_window: 10,
|
256
|
+
"parent_state": {
|
257
|
+
"parent_stream_name": {
|
258
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
"""
|
263
|
+
if not stream_state:
|
264
|
+
return
|
265
|
+
|
266
|
+
if self._PERPARTITION_STATE_KEY not in stream_state:
|
267
|
+
# We assume that `stream_state` is in a global format that can be applied to all partitions.
|
268
|
+
# Example: {"global_state_format_key": "global_state_format_value"}
|
269
|
+
self._global_cursor = deepcopy(stream_state)
|
270
|
+
self._new_global_cursor = deepcopy(stream_state)
|
271
|
+
|
272
|
+
else:
|
273
|
+
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
274
|
+
|
275
|
+
for state in stream_state[self._PERPARTITION_STATE_KEY]:
|
276
|
+
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
277
|
+
self._create_cursor(state["cursor"])
|
278
|
+
)
|
279
|
+
self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
|
280
|
+
threading.Semaphore(0)
|
281
|
+
)
|
282
|
+
|
283
|
+
# set default state for missing partitions if it is per partition with fallback to global
|
284
|
+
if self._GLOBAL_STATE_KEY in stream_state:
|
285
|
+
self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
286
|
+
self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
287
|
+
|
288
|
+
# Set initial parent state
|
289
|
+
if stream_state.get("parent_state"):
|
290
|
+
self._parent_state = stream_state["parent_state"]
|
291
|
+
|
292
|
+
# Set parent state for partition routers based on parent streams
|
293
|
+
self._partition_router.set_initial_state(stream_state)
|
294
|
+
|
295
|
+
def observe(self, record: Record) -> None:
|
296
|
+
if not record.associated_slice:
|
297
|
+
raise ValueError(
|
298
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
299
|
+
)
|
300
|
+
self._cursor_per_partition[
|
301
|
+
self._to_partition_key(record.associated_slice.partition)
|
302
|
+
].observe(record)
|
303
|
+
|
304
|
+
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
305
|
+
return self._partition_serializer.to_partition_key(partition)
|
306
|
+
|
307
|
+
def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
|
308
|
+
return self._partition_serializer.to_partition(partition_key)
|
309
|
+
|
310
|
+
def _create_cursor(
|
311
|
+
self, cursor_state: Any, runtime_lookback_window: int = 0
|
312
|
+
) -> ConcurrentCursor:
|
313
|
+
cursor = self._cursor_factory.create(
|
314
|
+
stream_state=deepcopy(cursor_state),
|
315
|
+
runtime_lookback_window=timedelta(seconds=runtime_lookback_window),
|
316
|
+
)
|
317
|
+
return cursor
|
318
|
+
|
319
|
+
def should_be_synced(self, record: Record) -> bool:
|
320
|
+
return self._get_cursor(record).should_be_synced(record)
|
321
|
+
|
322
|
+
def _get_cursor(self, record: Record) -> ConcurrentCursor:
|
323
|
+
if not record.associated_slice:
|
324
|
+
raise ValueError(
|
325
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
326
|
+
)
|
327
|
+
partition_key = self._to_partition_key(record.associated_slice.partition)
|
328
|
+
if partition_key not in self._cursor_per_partition:
|
329
|
+
raise ValueError(
|
330
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
331
|
+
)
|
332
|
+
cursor = self._cursor_per_partition[partition_key]
|
333
|
+
return cursor
|
@@ -303,6 +303,21 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
303
303
|
raise ValueError("A partition needs to be provided in order to get request body json")
|
304
304
|
|
305
305
|
def should_be_synced(self, record: Record) -> bool:
|
306
|
+
if (
|
307
|
+
record.associated_slice
|
308
|
+
and self._to_partition_key(record.associated_slice.partition)
|
309
|
+
not in self._cursor_per_partition
|
310
|
+
):
|
311
|
+
partition_state = (
|
312
|
+
self._state_to_migrate_from
|
313
|
+
if self._state_to_migrate_from
|
314
|
+
else self._NO_CURSOR_STATE
|
315
|
+
)
|
316
|
+
cursor = self._create_cursor(partition_state)
|
317
|
+
|
318
|
+
self._cursor_per_partition[
|
319
|
+
self._to_partition_key(record.associated_slice.partition)
|
320
|
+
] = cursor
|
306
321
|
return self._get_cursor(record).should_be_synced(
|
307
322
|
self._convert_record_to_cursor_record(record)
|
308
323
|
)
|
@@ -7,6 +7,7 @@ import logging
|
|
7
7
|
import pkgutil
|
8
8
|
from copy import deepcopy
|
9
9
|
from importlib import metadata
|
10
|
+
from types import ModuleType
|
10
11
|
from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
|
11
12
|
|
12
13
|
import yaml
|
@@ -32,6 +33,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
32
33
|
DeclarativeStream as DeclarativeStreamModel,
|
33
34
|
)
|
34
35
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
36
|
+
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
37
|
+
get_registered_components_module,
|
38
|
+
)
|
35
39
|
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
|
36
40
|
ManifestComponentTransformer,
|
37
41
|
)
|
@@ -59,22 +63,29 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
59
63
|
def __init__(
|
60
64
|
self,
|
61
65
|
source_config: ConnectionDefinition,
|
66
|
+
*,
|
67
|
+
config: Mapping[str, Any] | None = None,
|
62
68
|
debug: bool = False,
|
63
69
|
emit_connector_builder_messages: bool = False,
|
64
70
|
component_factory: Optional[ModelToComponentFactory] = None,
|
65
71
|
):
|
66
72
|
"""
|
67
|
-
:
|
68
|
-
|
69
|
-
|
73
|
+
Args:
|
74
|
+
config: The provided config dict.
|
75
|
+
source_config: The manifest of low-code components that describe the source connector.
|
76
|
+
debug: True if debug mode is enabled.
|
77
|
+
emit_connector_builder_messages: True if messages should be emitted to the connector builder.
|
78
|
+
component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked.
|
70
79
|
"""
|
71
80
|
self.logger = logging.getLogger(f"airbyte.{self.name}")
|
72
|
-
|
73
81
|
# For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
|
74
82
|
manifest = dict(source_config)
|
75
83
|
if "type" not in manifest:
|
76
84
|
manifest["type"] = "DeclarativeSource"
|
77
85
|
|
86
|
+
# If custom components are needed, locate and/or register them.
|
87
|
+
self.components_module: ModuleType | None = get_registered_components_module(config=config)
|
88
|
+
|
78
89
|
resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
|
79
90
|
propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
|
80
91
|
"", resolved_source_config, {}
|
@@ -0,0 +1,143 @@
|
|
1
|
+
"""Contains functions to compile custom code from text."""
|
2
|
+
|
3
|
+
import hashlib
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
from collections.abc import Mapping
|
7
|
+
from types import ModuleType
|
8
|
+
from typing import Any, cast
|
9
|
+
|
10
|
+
from typing_extensions import Literal
|
11
|
+
|
12
|
+
ChecksumType = Literal["md5", "sha256"]
|
13
|
+
CHECKSUM_FUNCTIONS = {
|
14
|
+
"md5": hashlib.md5,
|
15
|
+
"sha256": hashlib.sha256,
|
16
|
+
}
|
17
|
+
COMPONENTS_MODULE_NAME = "components"
|
18
|
+
SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
|
19
|
+
INJECTED_MANIFEST = "__injected_declarative_manifest"
|
20
|
+
INJECTED_COMPONENTS_PY = "__injected_components_py"
|
21
|
+
INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
|
22
|
+
ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
|
23
|
+
|
24
|
+
|
25
|
+
class AirbyteCodeTamperedError(Exception):
|
26
|
+
"""Raised when the connector's components module does not match its checksum.
|
27
|
+
|
28
|
+
This is a fatal error, as it can be a sign of code tampering.
|
29
|
+
"""
|
30
|
+
|
31
|
+
|
32
|
+
class AirbyteCustomCodeNotPermittedError(Exception):
|
33
|
+
"""Raised when custom code is attempted to be run in an environment that does not support it."""
|
34
|
+
|
35
|
+
def __init__(self) -> None:
|
36
|
+
super().__init__(
|
37
|
+
"Custom connector code is not permitted in this environment. "
|
38
|
+
"If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
|
39
|
+
"environment variable to 'true' in your Airbyte environment. "
|
40
|
+
"If you see this message in Airbyte Cloud, your workspace does not allow executing "
|
41
|
+
"custom connector code."
|
42
|
+
)
|
43
|
+
|
44
|
+
|
45
|
+
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
|
46
|
+
"""Return the hash of the input text using the specified hash type."""
|
47
|
+
if not input_text:
|
48
|
+
raise ValueError("Input text cannot be empty.")
|
49
|
+
|
50
|
+
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
|
51
|
+
hash_object.update(input_text.encode())
|
52
|
+
return hash_object.hexdigest()
|
53
|
+
|
54
|
+
|
55
|
+
def custom_code_execution_permitted() -> bool:
|
56
|
+
"""Return `True` if custom code execution is permitted, otherwise `False`.
|
57
|
+
|
58
|
+
Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
|
59
|
+
"""
|
60
|
+
return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
|
61
|
+
|
62
|
+
|
63
|
+
def validate_python_code(
|
64
|
+
code_text: str,
|
65
|
+
checksums: dict[str, str] | None,
|
66
|
+
) -> None:
|
67
|
+
"""Validate the provided Python code text against the provided checksums.
|
68
|
+
|
69
|
+
Currently we fail if no checksums are provided, although this may change in the future.
|
70
|
+
"""
|
71
|
+
if not checksums:
|
72
|
+
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
|
73
|
+
|
74
|
+
for checksum_type, checksum in checksums.items():
|
75
|
+
if checksum_type not in CHECKSUM_FUNCTIONS:
|
76
|
+
raise ValueError(
|
77
|
+
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
|
78
|
+
)
|
79
|
+
|
80
|
+
if _hash_text(code_text, checksum_type) != checksum:
|
81
|
+
raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
|
82
|
+
|
83
|
+
|
84
|
+
def get_registered_components_module(
|
85
|
+
config: Mapping[str, Any] | None,
|
86
|
+
) -> ModuleType | None:
|
87
|
+
"""Get a components module object based on the provided config.
|
88
|
+
|
89
|
+
If custom python components is provided, this will be loaded. Otherwise, we will
|
90
|
+
attempt to load from the `components` module already imported/registered in sys.modules.
|
91
|
+
|
92
|
+
If custom `components.py` text is provided in config, it will be registered with sys.modules
|
93
|
+
so that it can be later imported by manifest declarations which reference the provided classes.
|
94
|
+
|
95
|
+
Returns `None` if no components is provided and the `components` module is not found.
|
96
|
+
"""
|
97
|
+
if config and INJECTED_COMPONENTS_PY in config:
|
98
|
+
if not custom_code_execution_permitted():
|
99
|
+
raise AirbyteCustomCodeNotPermittedError
|
100
|
+
|
101
|
+
# Create a new module object and execute the provided Python code text within it
|
102
|
+
python_text: str = config[INJECTED_COMPONENTS_PY]
|
103
|
+
return register_components_module_from_string(
|
104
|
+
components_py_text=python_text,
|
105
|
+
checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
|
106
|
+
)
|
107
|
+
|
108
|
+
# Check for `components` or `source_declarative_manifest.components`.
|
109
|
+
if SDM_COMPONENTS_MODULE_NAME in sys.modules:
|
110
|
+
return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
|
111
|
+
|
112
|
+
if COMPONENTS_MODULE_NAME in sys.modules:
|
113
|
+
return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
|
114
|
+
|
115
|
+
# Could not find module 'components' in `sys.modules`
|
116
|
+
# and INJECTED_COMPONENTS_PY was not provided in config.
|
117
|
+
return None
|
118
|
+
|
119
|
+
|
120
|
+
def register_components_module_from_string(
|
121
|
+
components_py_text: str,
|
122
|
+
checksums: dict[str, Any] | None,
|
123
|
+
) -> ModuleType:
|
124
|
+
"""Load and return the components module from a provided string containing the python code."""
|
125
|
+
# First validate the code
|
126
|
+
validate_python_code(
|
127
|
+
code_text=components_py_text,
|
128
|
+
checksums=checksums,
|
129
|
+
)
|
130
|
+
|
131
|
+
# Create a new module object
|
132
|
+
components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
|
133
|
+
|
134
|
+
# Execute the module text in the module's namespace
|
135
|
+
exec(components_py_text, components_module.__dict__)
|
136
|
+
|
137
|
+
# Register the module in `sys.modules`` so it can be imported as
|
138
|
+
# `source_declarative_manifest.components` and/or `components`.
|
139
|
+
sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
|
140
|
+
sys.modules[COMPONENTS_MODULE_NAME] = components_module
|
141
|
+
|
142
|
+
# Now you can import and use the module
|
143
|
+
return components_module
|
@@ -8,6 +8,7 @@ import datetime
|
|
8
8
|
import importlib
|
9
9
|
import inspect
|
10
10
|
import re
|
11
|
+
import sys
|
11
12
|
from functools import partial
|
12
13
|
from typing import (
|
13
14
|
Any,
|
@@ -87,6 +88,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
87
88
|
)
|
88
89
|
from airbyte_cdk.sources.declarative.incremental import (
|
89
90
|
ChildPartitionResumableFullRefreshCursor,
|
91
|
+
ConcurrentCursorFactory,
|
92
|
+
ConcurrentPerPartitionCursor,
|
90
93
|
CursorFactory,
|
91
94
|
DatetimeBasedCursor,
|
92
95
|
DeclarativeCursor,
|
@@ -363,6 +366,10 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
363
366
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
364
367
|
ZipfileDecoder as ZipfileDecoderModel,
|
365
368
|
)
|
369
|
+
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
370
|
+
COMPONENTS_MODULE_NAME,
|
371
|
+
SDM_COMPONENTS_MODULE_NAME,
|
372
|
+
)
|
366
373
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
367
374
|
CartesianProductStreamSlicer,
|
368
375
|
ListPartitionRouter,
|
@@ -456,6 +463,7 @@ from airbyte_cdk.sources.message import (
|
|
456
463
|
InMemoryMessageRepository,
|
457
464
|
LogAppenderMessageRepositoryDecorator,
|
458
465
|
MessageRepository,
|
466
|
+
NoopMessageRepository,
|
459
467
|
)
|
460
468
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
461
469
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -912,6 +920,8 @@ class ModelToComponentFactory:
|
|
912
920
|
stream_namespace: Optional[str],
|
913
921
|
config: Config,
|
914
922
|
stream_state: MutableMapping[str, Any],
|
923
|
+
message_repository: Optional[MessageRepository] = None,
|
924
|
+
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
915
925
|
**kwargs: Any,
|
916
926
|
) -> ConcurrentCursor:
|
917
927
|
component_type = component_definition.get("type")
|
@@ -973,10 +983,22 @@ class ModelToComponentFactory:
|
|
973
983
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
974
984
|
datetime_format=datetime_format,
|
975
985
|
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
976
|
-
is_sequential_state=True,
|
986
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
977
987
|
cursor_granularity=cursor_granularity,
|
978
988
|
)
|
979
989
|
|
990
|
+
# Adjusts the stream state by applying the runtime lookback window.
|
991
|
+
# This is used to ensure correct state handling in case of failed partitions.
|
992
|
+
stream_state_value = stream_state.get(cursor_field.cursor_field_key)
|
993
|
+
if runtime_lookback_window and stream_state_value:
|
994
|
+
new_stream_state = (
|
995
|
+
connector_state_converter.parse_timestamp(stream_state_value)
|
996
|
+
- runtime_lookback_window
|
997
|
+
)
|
998
|
+
stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
|
999
|
+
new_stream_state
|
1000
|
+
)
|
1001
|
+
|
980
1002
|
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
981
1003
|
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
982
1004
|
start_date_runtime_value = self.create_min_max_datetime(
|
@@ -1047,7 +1069,7 @@ class ModelToComponentFactory:
|
|
1047
1069
|
stream_name=stream_name,
|
1048
1070
|
stream_namespace=stream_namespace,
|
1049
1071
|
stream_state=stream_state,
|
1050
|
-
message_repository=self._message_repository,
|
1072
|
+
message_repository=message_repository or self._message_repository,
|
1051
1073
|
connector_state_manager=state_manager,
|
1052
1074
|
connector_state_converter=connector_state_converter,
|
1053
1075
|
cursor_field=cursor_field,
|
@@ -1059,6 +1081,63 @@ class ModelToComponentFactory:
|
|
1059
1081
|
cursor_granularity=cursor_granularity,
|
1060
1082
|
)
|
1061
1083
|
|
1084
|
+
def create_concurrent_cursor_from_perpartition_cursor(
|
1085
|
+
self,
|
1086
|
+
state_manager: ConnectorStateManager,
|
1087
|
+
model_type: Type[BaseModel],
|
1088
|
+
component_definition: ComponentDefinition,
|
1089
|
+
stream_name: str,
|
1090
|
+
stream_namespace: Optional[str],
|
1091
|
+
config: Config,
|
1092
|
+
stream_state: MutableMapping[str, Any],
|
1093
|
+
partition_router: PartitionRouter,
|
1094
|
+
**kwargs: Any,
|
1095
|
+
) -> ConcurrentPerPartitionCursor:
|
1096
|
+
component_type = component_definition.get("type")
|
1097
|
+
if component_definition.get("type") != model_type.__name__:
|
1098
|
+
raise ValueError(
|
1099
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1100
|
+
)
|
1101
|
+
|
1102
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1103
|
+
|
1104
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1105
|
+
raise ValueError(
|
1106
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1107
|
+
)
|
1108
|
+
|
1109
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1110
|
+
datetime_based_cursor_model.cursor_field,
|
1111
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1112
|
+
)
|
1113
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1114
|
+
|
1115
|
+
# Create the cursor factory
|
1116
|
+
cursor_factory = ConcurrentCursorFactory(
|
1117
|
+
partial(
|
1118
|
+
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1119
|
+
state_manager=state_manager,
|
1120
|
+
model_type=model_type,
|
1121
|
+
component_definition=component_definition,
|
1122
|
+
stream_name=stream_name,
|
1123
|
+
stream_namespace=stream_namespace,
|
1124
|
+
config=config,
|
1125
|
+
message_repository=NoopMessageRepository(),
|
1126
|
+
)
|
1127
|
+
)
|
1128
|
+
|
1129
|
+
# Return the concurrent cursor and state converter
|
1130
|
+
return ConcurrentPerPartitionCursor(
|
1131
|
+
cursor_factory=cursor_factory,
|
1132
|
+
partition_router=partition_router,
|
1133
|
+
stream_name=stream_name,
|
1134
|
+
stream_namespace=stream_namespace,
|
1135
|
+
stream_state=stream_state,
|
1136
|
+
message_repository=self._message_repository, # type: ignore
|
1137
|
+
connector_state_manager=state_manager,
|
1138
|
+
cursor_field=cursor_field,
|
1139
|
+
)
|
1140
|
+
|
1062
1141
|
@staticmethod
|
1063
1142
|
def create_constant_backoff_strategy(
|
1064
1143
|
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
@@ -1102,7 +1181,6 @@ class ModelToComponentFactory:
|
|
1102
1181
|
:param config: The custom defined connector config
|
1103
1182
|
:return: The declarative component built from the Pydantic model to be used at runtime
|
1104
1183
|
"""
|
1105
|
-
|
1106
1184
|
custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
|
1107
1185
|
component_fields = get_type_hints(custom_component_class)
|
1108
1186
|
model_args = model.dict()
|
@@ -1156,14 +1234,38 @@ class ModelToComponentFactory:
|
|
1156
1234
|
return custom_component_class(**kwargs)
|
1157
1235
|
|
1158
1236
|
@staticmethod
|
1159
|
-
def _get_class_from_fully_qualified_class_name(
|
1237
|
+
def _get_class_from_fully_qualified_class_name(
|
1238
|
+
full_qualified_class_name: str,
|
1239
|
+
) -> Any:
|
1240
|
+
"""Get a class from its fully qualified name.
|
1241
|
+
|
1242
|
+
If a custom components module is needed, we assume it is already registered - probably
|
1243
|
+
as `source_declarative_manifest.components` or `components`.
|
1244
|
+
|
1245
|
+
Args:
|
1246
|
+
full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
|
1247
|
+
|
1248
|
+
Returns:
|
1249
|
+
Any: The class object.
|
1250
|
+
|
1251
|
+
Raises:
|
1252
|
+
ValueError: If the class cannot be loaded.
|
1253
|
+
"""
|
1160
1254
|
split = full_qualified_class_name.split(".")
|
1161
|
-
|
1255
|
+
module_name_full = ".".join(split[:-1])
|
1162
1256
|
class_name = split[-1]
|
1257
|
+
|
1258
|
+
try:
|
1259
|
+
module_ref = importlib.import_module(module_name_full)
|
1260
|
+
except ModuleNotFoundError as e:
|
1261
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1262
|
+
|
1163
1263
|
try:
|
1164
|
-
return getattr(
|
1165
|
-
except AttributeError:
|
1166
|
-
raise ValueError(
|
1264
|
+
return getattr(module_ref, class_name)
|
1265
|
+
except AttributeError as e:
|
1266
|
+
raise ValueError(
|
1267
|
+
f"Could not load class `{class_name}` from module `{module_name_full}`.",
|
1268
|
+
) from e
|
1167
1269
|
|
1168
1270
|
@staticmethod
|
1169
1271
|
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
|
@@ -1341,18 +1443,15 @@ class ModelToComponentFactory:
|
|
1341
1443
|
raise ValueError(
|
1342
1444
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1343
1445
|
)
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1353
|
-
else None
|
1354
|
-
),
|
1355
|
-
}
|
1446
|
+
cursor = (
|
1447
|
+
combined_slicers
|
1448
|
+
if isinstance(
|
1449
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1450
|
+
)
|
1451
|
+
else self._create_component_from_model(model=model.incremental_sync, config=config)
|
1452
|
+
)
|
1453
|
+
|
1454
|
+
client_side_incremental_sync = {"cursor": cursor}
|
1356
1455
|
|
1357
1456
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1358
1457
|
cursor_model = model.incremental_sync
|
@@ -2199,7 +2298,7 @@ class ModelToComponentFactory:
|
|
2199
2298
|
if (
|
2200
2299
|
not isinstance(stream_slicer, DatetimeBasedCursor)
|
2201
2300
|
or type(stream_slicer) is not DatetimeBasedCursor
|
2202
|
-
):
|
2301
|
+
) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
|
2203
2302
|
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
2204
2303
|
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
2205
2304
|
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
|
|
160
160
|
stream_slice,
|
161
161
|
next_page_token,
|
162
162
|
self._paginator.get_request_headers,
|
163
|
-
self.
|
163
|
+
self.request_option_provider.get_request_headers,
|
164
164
|
)
|
165
165
|
if isinstance(headers, str):
|
166
166
|
raise ValueError("Request headers cannot be a string")
|
@@ -196,7 +196,9 @@ class ConcurrentCursor(Cursor):
|
|
196
196
|
|
197
197
|
@property
|
198
198
|
def state(self) -> MutableMapping[str, Any]:
|
199
|
-
return self.
|
199
|
+
return self._connector_state_converter.convert_to_state_message(
|
200
|
+
self.cursor_field, self._concurrent_state
|
201
|
+
)
|
200
202
|
|
201
203
|
@property
|
202
204
|
def cursor_field(self) -> CursorField:
|
@@ -241,10 +243,10 @@ class ConcurrentCursor(Cursor):
|
|
241
243
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
242
244
|
|
243
245
|
def close_partition(self, partition: Partition) -> None:
|
244
|
-
slice_count_before = len(self.
|
246
|
+
slice_count_before = len(self._concurrent_state.get("slices", []))
|
245
247
|
self._add_slice_to_state(partition)
|
246
248
|
if slice_count_before < len(
|
247
|
-
self.
|
249
|
+
self._concurrent_state["slices"]
|
248
250
|
): # only emit if at least one slice has been processed
|
249
251
|
self._merge_partitions()
|
250
252
|
self._emit_state_message()
|
@@ -256,11 +258,11 @@ class ConcurrentCursor(Cursor):
|
|
256
258
|
)
|
257
259
|
|
258
260
|
if self._slice_boundary_fields:
|
259
|
-
if "slices" not in self.
|
261
|
+
if "slices" not in self._concurrent_state:
|
260
262
|
raise RuntimeError(
|
261
263
|
f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
|
262
264
|
)
|
263
|
-
self.
|
265
|
+
self._concurrent_state["slices"].append(
|
264
266
|
{
|
265
267
|
self._connector_state_converter.START_KEY: self._extract_from_slice(
|
266
268
|
partition, self._slice_boundary_fields[self._START_BOUNDARY]
|
@@ -288,7 +290,7 @@ class ConcurrentCursor(Cursor):
|
|
288
290
|
"expected. Please contact the Airbyte team."
|
289
291
|
)
|
290
292
|
|
291
|
-
self.
|
293
|
+
self._concurrent_state["slices"].append(
|
292
294
|
{
|
293
295
|
self._connector_state_converter.START_KEY: self.start,
|
294
296
|
self._connector_state_converter.END_KEY: most_recent_cursor_value,
|
@@ -300,9 +302,7 @@ class ConcurrentCursor(Cursor):
|
|
300
302
|
self._connector_state_manager.update_state_for_stream(
|
301
303
|
self._stream_name,
|
302
304
|
self._stream_namespace,
|
303
|
-
self.
|
304
|
-
self._cursor_field, self.state
|
305
|
-
),
|
305
|
+
self.state,
|
306
306
|
)
|
307
307
|
state_message = self._connector_state_manager.create_state_message(
|
308
308
|
self._stream_name, self._stream_namespace
|
@@ -310,7 +310,9 @@ class ConcurrentCursor(Cursor):
|
|
310
310
|
self._message_repository.emit_message(state_message)
|
311
311
|
|
312
312
|
def _merge_partitions(self) -> None:
|
313
|
-
self.
|
313
|
+
self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
|
314
|
+
self._concurrent_state["slices"]
|
315
|
+
)
|
314
316
|
|
315
317
|
def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
|
316
318
|
try:
|
@@ -347,36 +349,42 @@ class ConcurrentCursor(Cursor):
|
|
347
349
|
if self._start is not None and self._is_start_before_first_slice():
|
348
350
|
yield from self._split_per_slice_range(
|
349
351
|
self._start,
|
350
|
-
self.
|
352
|
+
self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
|
351
353
|
False,
|
352
354
|
)
|
353
355
|
|
354
|
-
if len(self.
|
356
|
+
if len(self._concurrent_state["slices"]) == 1:
|
355
357
|
yield from self._split_per_slice_range(
|
356
358
|
self._calculate_lower_boundary_of_last_slice(
|
357
|
-
self.
|
359
|
+
self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
|
358
360
|
),
|
359
361
|
self._end_provider(),
|
360
362
|
True,
|
361
363
|
)
|
362
|
-
elif len(self.
|
363
|
-
for i in range(len(self.
|
364
|
+
elif len(self._concurrent_state["slices"]) > 1:
|
365
|
+
for i in range(len(self._concurrent_state["slices"]) - 1):
|
364
366
|
if self._cursor_granularity:
|
365
367
|
yield from self._split_per_slice_range(
|
366
|
-
self.
|
368
|
+
self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
|
367
369
|
+ self._cursor_granularity,
|
368
|
-
self.
|
370
|
+
self._concurrent_state["slices"][i + 1][
|
371
|
+
self._connector_state_converter.START_KEY
|
372
|
+
],
|
369
373
|
False,
|
370
374
|
)
|
371
375
|
else:
|
372
376
|
yield from self._split_per_slice_range(
|
373
|
-
self.
|
374
|
-
|
377
|
+
self._concurrent_state["slices"][i][
|
378
|
+
self._connector_state_converter.END_KEY
|
379
|
+
],
|
380
|
+
self._concurrent_state["slices"][i + 1][
|
381
|
+
self._connector_state_converter.START_KEY
|
382
|
+
],
|
375
383
|
False,
|
376
384
|
)
|
377
385
|
yield from self._split_per_slice_range(
|
378
386
|
self._calculate_lower_boundary_of_last_slice(
|
379
|
-
self.
|
387
|
+
self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
|
380
388
|
),
|
381
389
|
self._end_provider(),
|
382
390
|
True,
|
@@ -387,7 +395,8 @@ class ConcurrentCursor(Cursor):
|
|
387
395
|
def _is_start_before_first_slice(self) -> bool:
|
388
396
|
return (
|
389
397
|
self._start is not None
|
390
|
-
and self._start
|
398
|
+
and self._start
|
399
|
+
< self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
|
391
400
|
)
|
392
401
|
|
393
402
|
def _calculate_lower_boundary_of_last_slice(
|
@@ -4,7 +4,6 @@
|
|
4
4
|
import importlib.util
|
5
5
|
from pathlib import Path
|
6
6
|
from types import ModuleType
|
7
|
-
from typing import Optional
|
8
7
|
|
9
8
|
import pytest
|
10
9
|
|
@@ -30,7 +29,7 @@ def connector_dir(request: pytest.FixtureRequest) -> Path:
|
|
30
29
|
|
31
30
|
|
32
31
|
@pytest.fixture(scope="session")
|
33
|
-
def components_module(connector_dir: Path) ->
|
32
|
+
def components_module(connector_dir: Path) -> ModuleType | None:
|
34
33
|
"""Load and return the components module from the connector directory.
|
35
34
|
|
36
35
|
This assumes the components module is located at <connector_dir>/components.py.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.
|
3
|
+
Version: 6.23.0.dev0
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
License: MIT
|
6
6
|
Keywords: airbyte,connector-development-kit,cdk
|
@@ -23,7 +23,7 @@ Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
|
23
23
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
24
24
|
Requires-Dist: Unidecode (>=1.3,<2.0)
|
25
25
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
|
-
Requires-Dist: avro (>=1.11.2,<1.
|
26
|
+
Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
|
27
27
|
Requires-Dist: backoff
|
28
28
|
Requires-Dist: cachetools
|
29
29
|
Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
|
@@ -1,13 +1,13 @@
|
|
1
1
|
airbyte_cdk/__init__.py,sha256=52uncJvDQNHvwKxaqzXgnMYTptIl65LDJr2fvlk8-DU,11707
|
2
2
|
airbyte_cdk/cli/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
|
3
3
|
airbyte_cdk/cli/source_declarative_manifest/__init__.py,sha256=-0ST722Nj65bgRokzpzPkD1NBBW5CytEHFUe38cB86Q,91
|
4
|
-
airbyte_cdk/cli/source_declarative_manifest/_run.py,sha256=
|
4
|
+
airbyte_cdk/cli/source_declarative_manifest/_run.py,sha256=dMNFuS_z3irzN8IoHj0o155Oeud1E0rMuNAD3jyY1Q8,8303
|
5
5
|
airbyte_cdk/cli/source_declarative_manifest/spec.json,sha256=Earc1L6ngcdIr514oFQlUoOxdF4RHqtUyStSIAquXdY,554
|
6
6
|
airbyte_cdk/config_observation.py,sha256=7SSPxtN0nXPkm4euGNcTTr1iLbwUL01jy-24V1Hzde0,3986
|
7
7
|
airbyte_cdk/connector.py,sha256=bO23kdGRkl8XKFytOgrrWFc_VagteTHVEF6IsbizVkM,4224
|
8
8
|
airbyte_cdk/connector_builder/README.md,sha256=Hw3wvVewuHG9-QgsAq1jDiKuLlStDxKBz52ftyNRnBw,1665
|
9
9
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
10
|
-
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=
|
10
|
+
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=umB60OXyrCxzbO3qWMy870YRlnubmIiPG76ZoP8Hq_s,4255
|
11
11
|
airbyte_cdk/connector_builder/main.py,sha256=ubAPE0Oo5gjZOa-KMtLLJQkc8_inUpFR3sIb2DEh2No,3722
|
12
12
|
airbyte_cdk/connector_builder/message_grouper.py,sha256=Xckskpqe9kbUByaKVmPsfTKxuyI2FHt8k4NZ4p8xo_I,19813
|
13
13
|
airbyte_cdk/connector_builder/models.py,sha256=uCHpOdJx2PyZtIqk-mt9eSVuFMQoEqrW-9sjCz0Z-AQ,1500
|
@@ -63,7 +63,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
63
63
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
65
65
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
66
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
66
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=wbfk5uduLnEgdwJrKxKvK7TpGGIpsOxMGi1lOniipLA,25577
|
67
67
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
68
68
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
69
69
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
@@ -83,15 +83,16 @@ airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrq
|
|
83
83
|
airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
|
84
84
|
airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
|
85
85
|
airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
|
86
|
-
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=
|
86
|
+
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
|
87
87
|
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD-TGCScXvW95ThNKyPGcx2SiWbG1-H-sc,6552
|
88
88
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
89
89
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
90
|
-
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=
|
90
|
+
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
91
|
+
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=K7201hcQbogtWZCy85gmluWB564_8sg_8MvrAfoy7MA,14466
|
91
92
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
|
92
93
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
93
|
-
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=
|
94
|
-
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=
|
94
|
+
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=9HO-QbL9akvjq2NP7l498RwLA4iQZlBMQW1tZbt34I8,15943
|
95
|
+
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=_FSJjAwL4Zu-i2CngnhTtx8j-NPVSBKj5LwDSPta3Cg,16305
|
95
96
|
airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
|
96
97
|
airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
|
97
98
|
airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
|
@@ -103,17 +104,18 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ
|
|
103
104
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
104
105
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
|
105
106
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=Y5AWYxbJTUtJ_Jm7DV9qrZDiymFR9LST7fBt4piT2-U,4585
|
106
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=
|
107
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=aMB7lmbXLhgAHcx-pIX4r0BSe9rJGebLTDh7hxlC6bA,16837
|
107
108
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
108
109
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
109
110
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
110
111
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
111
112
|
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=Fv6D9D5hyYhjCWfeIPpyeFWQakMsIsoBbqosSHLHmEs,96909
|
112
113
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
114
|
+
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=958MMX6_ZOJUlDDdNr9Krosgi2bCKGx2Z765M2Woz18,5505
|
113
115
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
114
116
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
115
117
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
116
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
118
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=nVso02_fdZXK6bl-8u9LazoIsfkDuFe6mQTjkAWkH3s,117764
|
117
119
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
118
120
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
119
121
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -165,7 +167,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
165
167
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
|
166
168
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
|
167
169
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
168
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
170
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=kgnhVQxRlFqJs2-rDu2-QH-p-GzQU3nKmSp6_aq8u0s,24550
|
169
171
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
|
170
172
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
171
173
|
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=sa99VqU1U45fgZL2qEdw8ueX1tPTPfGxibQ-ZFePjSM,9361
|
@@ -260,7 +262,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
|
|
260
262
|
airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
|
261
263
|
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
|
262
264
|
airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
|
263
|
-
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=
|
265
|
+
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=suObbNi24so8Wcj0Wm32OkJAcuvODAOwp373YBmUPp0,21213
|
264
266
|
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
|
265
267
|
airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
|
266
268
|
airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
|
@@ -328,7 +330,7 @@ airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMo
|
|
328
330
|
airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
|
329
331
|
airbyte_cdk/test/utils/data.py,sha256=CkCR1_-rujWNmPXFR1IXTMwx1rAl06wAyIKWpDcN02w,820
|
330
332
|
airbyte_cdk/test/utils/http_mocking.py,sha256=F2hpm2q4ijojQN5u2XtgTAp8aNgHgJ64eZNkZ9BW0ig,550
|
331
|
-
airbyte_cdk/test/utils/manifest_only_fixtures.py,sha256=
|
333
|
+
airbyte_cdk/test/utils/manifest_only_fixtures.py,sha256=7HqCmsfNaAIjq2o9V9f-rgQdksncDZFfMifQpFzlLXo,2104
|
332
334
|
airbyte_cdk/test/utils/reading.py,sha256=SOTDYlps6Te9KumfTJ3vVDSm9EUXhvKtE8aD7gvdPlg,965
|
333
335
|
airbyte_cdk/utils/__init__.py,sha256=qhnC02DbS35OY8oB_tkYHwZzHed2FZeBM__G8IOgckY,347
|
334
336
|
airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=wEtRnl5KRhN6eLJwrDrC4FJjyqt_4vkA1F65mdl8c24,3142
|
@@ -346,8 +348,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
346
348
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
347
349
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
348
350
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
349
|
-
airbyte_cdk-6.
|
350
|
-
airbyte_cdk-6.
|
351
|
-
airbyte_cdk-6.
|
352
|
-
airbyte_cdk-6.
|
353
|
-
airbyte_cdk-6.
|
351
|
+
airbyte_cdk-6.23.0.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
352
|
+
airbyte_cdk-6.23.0.dev0.dist-info/METADATA,sha256=Dv07oYfuxMAjky8DRjj5e5a6TWDJK-SXDIAIQk86X3k,6001
|
353
|
+
airbyte_cdk-6.23.0.dev0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
354
|
+
airbyte_cdk-6.23.0.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
355
|
+
airbyte_cdk-6.23.0.dev0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|