airbyte-cdk 6.60.16__py3-none-any.whl → 6.61.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +32 -36
- airbyte_cdk/connector_builder/main.py +3 -3
- airbyte_cdk/connector_builder/test_reader/helpers.py +24 -2
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +1 -1
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +15 -22
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +30 -18
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +73 -3
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +58 -5
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +3 -5
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +10 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +42 -4
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer_test_read_decorator.py +2 -2
- airbyte_cdk/sources/message/concurrent_repository.py +47 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +23 -7
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +46 -5
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +7 -1
- airbyte_cdk/sources/streams/http/http_client.py +4 -1
- airbyte_cdk/sources/utils/slice_logger.py +4 -0
- {airbyte_cdk-6.60.16.dist-info → airbyte_cdk-6.61.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.60.16.dist-info → airbyte_cdk-6.61.1.dist-info}/RECORD +24 -23
- {airbyte_cdk-6.60.16.dist-info → airbyte_cdk-6.61.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.60.16.dist-info → airbyte_cdk-6.61.1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.60.16.dist-info → airbyte_cdk-6.61.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.60.16.dist-info → airbyte_cdk-6.61.1.dist-info}/entry_points.txt +0 -0
@@ -3,8 +3,8 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
|
6
|
-
from dataclasses import asdict
|
7
|
-
from typing import Any,
|
6
|
+
from dataclasses import asdict
|
7
|
+
from typing import Any, Dict, List, Mapping, Optional
|
8
8
|
|
9
9
|
from airbyte_cdk.connector_builder.test_reader import TestReader
|
10
10
|
from airbyte_cdk.models import (
|
@@ -15,45 +15,32 @@ from airbyte_cdk.models import (
|
|
15
15
|
Type,
|
16
16
|
)
|
17
17
|
from airbyte_cdk.models import Type as MessageType
|
18
|
+
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
19
|
+
ConcurrentDeclarativeSource,
|
20
|
+
TestLimits,
|
21
|
+
)
|
18
22
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
19
23
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
20
|
-
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
21
|
-
ModelToComponentFactory,
|
22
|
-
)
|
23
24
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
24
25
|
from airbyte_cdk.utils.datetime_helpers import ab_datetime_now
|
25
26
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
26
27
|
|
27
|
-
DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
|
28
|
-
DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5
|
29
|
-
DEFAULT_MAXIMUM_RECORDS = 100
|
30
|
-
DEFAULT_MAXIMUM_STREAMS = 100
|
31
|
-
|
32
28
|
MAX_PAGES_PER_SLICE_KEY = "max_pages_per_slice"
|
33
29
|
MAX_SLICES_KEY = "max_slices"
|
34
30
|
MAX_RECORDS_KEY = "max_records"
|
35
31
|
MAX_STREAMS_KEY = "max_streams"
|
36
32
|
|
37
33
|
|
38
|
-
@dataclass
|
39
|
-
class TestLimits:
|
40
|
-
__test__: ClassVar[bool] = False # Tell Pytest this is not a Pytest class, despite its name
|
41
|
-
|
42
|
-
max_records: int = field(default=DEFAULT_MAXIMUM_RECORDS)
|
43
|
-
max_pages_per_slice: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE)
|
44
|
-
max_slices: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_SLICES)
|
45
|
-
max_streams: int = field(default=DEFAULT_MAXIMUM_STREAMS)
|
46
|
-
|
47
|
-
|
48
34
|
def get_limits(config: Mapping[str, Any]) -> TestLimits:
|
49
35
|
command_config = config.get("__test_read_config", {})
|
50
|
-
|
51
|
-
command_config.get(
|
36
|
+
return TestLimits(
|
37
|
+
max_records=command_config.get(MAX_RECORDS_KEY, TestLimits.DEFAULT_MAX_RECORDS),
|
38
|
+
max_pages_per_slice=command_config.get(
|
39
|
+
MAX_PAGES_PER_SLICE_KEY, TestLimits.DEFAULT_MAX_PAGES_PER_SLICE
|
40
|
+
),
|
41
|
+
max_slices=command_config.get(MAX_SLICES_KEY, TestLimits.DEFAULT_MAX_SLICES),
|
42
|
+
max_streams=command_config.get(MAX_STREAMS_KEY, TestLimits.DEFAULT_MAX_STREAMS),
|
52
43
|
)
|
53
|
-
max_slices = command_config.get(MAX_SLICES_KEY) or DEFAULT_MAXIMUM_NUMBER_OF_SLICES
|
54
|
-
max_records = command_config.get(MAX_RECORDS_KEY) or DEFAULT_MAXIMUM_RECORDS
|
55
|
-
max_streams = command_config.get(MAX_STREAMS_KEY) or DEFAULT_MAXIMUM_STREAMS
|
56
|
-
return TestLimits(max_records, max_pages_per_slice, max_slices, max_streams)
|
57
44
|
|
58
45
|
|
59
46
|
def should_migrate_manifest(config: Mapping[str, Any]) -> bool:
|
@@ -75,21 +62,30 @@ def should_normalize_manifest(config: Mapping[str, Any]) -> bool:
|
|
75
62
|
return config.get("__should_normalize", False)
|
76
63
|
|
77
64
|
|
78
|
-
def create_source(
|
65
|
+
def create_source(
|
66
|
+
config: Mapping[str, Any],
|
67
|
+
limits: TestLimits,
|
68
|
+
catalog: Optional[ConfiguredAirbyteCatalog],
|
69
|
+
state: Optional[List[AirbyteStateMessage]],
|
70
|
+
) -> ConcurrentDeclarativeSource[Optional[List[AirbyteStateMessage]]]:
|
79
71
|
manifest = config["__injected_declarative_manifest"]
|
80
|
-
|
72
|
+
|
73
|
+
# We enforce a concurrency level of 1 so that the stream is processed on a single thread
|
74
|
+
# to retain ordering for the grouping of the builder message responses.
|
75
|
+
if "concurrency_level" in manifest:
|
76
|
+
manifest["concurrency_level"]["default_concurrency"] = 1
|
77
|
+
else:
|
78
|
+
manifest["concurrency_level"] = {"type": "ConcurrencyLevel", "default_concurrency": 1}
|
79
|
+
|
80
|
+
return ConcurrentDeclarativeSource(
|
81
|
+
catalog=catalog,
|
81
82
|
config=config,
|
82
|
-
|
83
|
+
state=state,
|
83
84
|
source_config=manifest,
|
85
|
+
emit_connector_builder_messages=True,
|
84
86
|
migrate_manifest=should_migrate_manifest(config),
|
85
87
|
normalize_manifest=should_normalize_manifest(config),
|
86
|
-
|
87
|
-
emit_connector_builder_messages=True,
|
88
|
-
limit_pages_fetched_per_slice=limits.max_pages_per_slice,
|
89
|
-
limit_slices_fetched=limits.max_slices,
|
90
|
-
disable_retries=True,
|
91
|
-
disable_cache=True,
|
92
|
-
),
|
88
|
+
limits=limits,
|
93
89
|
)
|
94
90
|
|
95
91
|
|
@@ -91,12 +91,12 @@ def handle_connector_builder_request(
|
|
91
91
|
def handle_request(args: List[str]) -> str:
|
92
92
|
command, config, catalog, state = get_config_and_catalog_from_args(args)
|
93
93
|
limits = get_limits(config)
|
94
|
-
source = create_source(config, limits)
|
95
|
-
return orjson.dumps(
|
94
|
+
source = create_source(config=config, limits=limits, catalog=catalog, state=state)
|
95
|
+
return orjson.dumps( # type: ignore[no-any-return] # Serializer.dump() always returns AirbyteMessage
|
96
96
|
AirbyteMessageSerializer.dump(
|
97
97
|
handle_connector_builder_request(source, command, config, catalog, state, limits)
|
98
98
|
)
|
99
|
-
).decode()
|
99
|
+
).decode()
|
100
100
|
|
101
101
|
|
102
102
|
if __name__ == "__main__":
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import json
|
6
6
|
from copy import deepcopy
|
7
7
|
from json import JSONDecodeError
|
8
|
-
from typing import Any, Dict, List, Mapping, Optional
|
8
|
+
from typing import Any, Dict, List, Mapping, Optional, Union
|
9
9
|
|
10
10
|
from airbyte_cdk.connector_builder.models import (
|
11
11
|
AuxiliaryRequest,
|
@@ -17,6 +17,8 @@ from airbyte_cdk.connector_builder.models import (
|
|
17
17
|
from airbyte_cdk.models import (
|
18
18
|
AirbyteLogMessage,
|
19
19
|
AirbyteMessage,
|
20
|
+
AirbyteStateBlob,
|
21
|
+
AirbyteStateMessage,
|
20
22
|
OrchestratorType,
|
21
23
|
TraceType,
|
22
24
|
)
|
@@ -466,7 +468,7 @@ def handle_current_slice(
|
|
466
468
|
return StreamReadSlices(
|
467
469
|
pages=current_slice_pages,
|
468
470
|
slice_descriptor=current_slice_descriptor,
|
469
|
-
state=[latest_state_message] if latest_state_message else [],
|
471
|
+
state=[convert_state_blob_to_mapping(latest_state_message)] if latest_state_message else [],
|
470
472
|
auxiliary_requests=auxiliary_requests if auxiliary_requests else [],
|
471
473
|
)
|
472
474
|
|
@@ -718,3 +720,23 @@ def get_auxiliary_request_type(stream: dict, http: dict) -> str: # type: ignore
|
|
718
720
|
Determines the type of the auxiliary request based on the stream and HTTP properties.
|
719
721
|
"""
|
720
722
|
return "PARENT_STREAM" if stream.get("is_substream", False) else str(http.get("type", None))
|
723
|
+
|
724
|
+
|
725
|
+
def convert_state_blob_to_mapping(
|
726
|
+
state_message: Union[AirbyteStateMessage, Dict[str, Any]],
|
727
|
+
) -> Dict[str, Any]:
|
728
|
+
"""
|
729
|
+
The AirbyteStreamState stores state as an AirbyteStateBlob which deceivingly is not
|
730
|
+
a dictionary, but rather a list of kwargs fields. This in turn causes it to not be
|
731
|
+
properly turned into a dictionary when translating this back into response output
|
732
|
+
by the connector_builder_handler using asdict()
|
733
|
+
"""
|
734
|
+
|
735
|
+
if isinstance(state_message, AirbyteStateMessage) and state_message.stream:
|
736
|
+
state_value = state_message.stream.stream_state
|
737
|
+
if isinstance(state_value, AirbyteStateBlob):
|
738
|
+
state_value_mapping = {k: v for k, v in state_value.__dict__.items()}
|
739
|
+
state_message.stream.stream_state = state_value_mapping # type: ignore # we intentionally set this as a Dict so that StreamReadSlices is translated properly in the resulting HTTP response
|
740
|
+
return state_message # type: ignore # See above, but when this is an AirbyteStateMessage we must convert AirbyteStateBlob to a Dict
|
741
|
+
else:
|
742
|
+
return state_message # type: ignore # This is guaranteed to be a Dict since we check isinstance AirbyteStateMessage above
|
@@ -95,7 +95,7 @@ def get_message_groups(
|
|
95
95
|
latest_state_message: Optional[Dict[str, Any]] = None
|
96
96
|
slice_auxiliary_requests: List[AuxiliaryRequest] = []
|
97
97
|
|
98
|
-
while
|
98
|
+
while message := next(messages, None):
|
99
99
|
json_message = airbyte_message_to_json(message)
|
100
100
|
|
101
101
|
if is_page_http_request_for_different_stream(json_message, stream_name):
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
import logging
|
5
|
+
import os
|
5
6
|
from typing import Dict, Iterable, List, Optional, Set
|
6
7
|
|
7
8
|
from airbyte_cdk.exception_handler import generate_failed_streams_error_message
|
@@ -95,11 +96,14 @@ class ConcurrentReadProcessor:
|
|
95
96
|
"""
|
96
97
|
stream_name = partition.stream_name()
|
97
98
|
self._streams_to_running_partitions[stream_name].add(partition)
|
99
|
+
cursor = self._stream_name_to_instance[stream_name].cursor
|
98
100
|
if self._slice_logger.should_log_slice_message(self._logger):
|
99
101
|
self._message_repository.emit_message(
|
100
102
|
self._slice_logger.create_slice_log_message(partition.to_slice())
|
101
103
|
)
|
102
|
-
self._thread_pool_manager.submit(
|
104
|
+
self._thread_pool_manager.submit(
|
105
|
+
self._partition_reader.process_partition, partition, cursor
|
106
|
+
)
|
103
107
|
|
104
108
|
def on_partition_complete_sentinel(
|
105
109
|
self, sentinel: PartitionCompleteSentinel
|
@@ -112,26 +116,16 @@ class ConcurrentReadProcessor:
|
|
112
116
|
"""
|
113
117
|
partition = sentinel.partition
|
114
118
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
partitions_running = self._streams_to_running_partitions[partition.stream_name()]
|
126
|
-
if partition in partitions_running:
|
127
|
-
partitions_running.remove(partition)
|
128
|
-
# If all partitions were generated and this was the last one, the stream is done
|
129
|
-
if (
|
130
|
-
partition.stream_name() not in self._streams_currently_generating_partitions
|
131
|
-
and len(partitions_running) == 0
|
132
|
-
):
|
133
|
-
yield from self._on_stream_is_done(partition.stream_name())
|
134
|
-
yield from self._message_repository.consume_queue()
|
119
|
+
partitions_running = self._streams_to_running_partitions[partition.stream_name()]
|
120
|
+
if partition in partitions_running:
|
121
|
+
partitions_running.remove(partition)
|
122
|
+
# If all partitions were generated and this was the last one, the stream is done
|
123
|
+
if (
|
124
|
+
partition.stream_name() not in self._streams_currently_generating_partitions
|
125
|
+
and len(partitions_running) == 0
|
126
|
+
):
|
127
|
+
yield from self._on_stream_is_done(partition.stream_name())
|
128
|
+
yield from self._message_repository.consume_queue()
|
135
129
|
|
136
130
|
def on_record(self, record: Record) -> Iterable[AirbyteMessage]:
|
137
131
|
"""
|
@@ -160,7 +154,6 @@ class ConcurrentReadProcessor:
|
|
160
154
|
stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING
|
161
155
|
)
|
162
156
|
self._record_counter[stream.name] += 1
|
163
|
-
stream.cursor.observe(record)
|
164
157
|
yield message
|
165
158
|
yield from self._message_repository.consume_queue()
|
166
159
|
|
@@ -1,10 +1,11 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
+
|
4
5
|
import concurrent
|
5
6
|
import logging
|
6
7
|
from queue import Queue
|
7
|
-
from typing import Iterable, Iterator, List
|
8
|
+
from typing import Iterable, Iterator, List, Optional
|
8
9
|
|
9
10
|
from airbyte_cdk.models import AirbyteMessage
|
10
11
|
from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
|
@@ -16,7 +17,7 @@ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPool
|
|
16
17
|
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
17
18
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
18
19
|
from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
|
19
|
-
from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionLogger, PartitionReader
|
20
21
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
21
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.types import (
|
22
23
|
PartitionCompleteSentinel,
|
@@ -43,6 +44,7 @@ class ConcurrentSource:
|
|
43
44
|
logger: logging.Logger,
|
44
45
|
slice_logger: SliceLogger,
|
45
46
|
message_repository: MessageRepository,
|
47
|
+
queue: Optional[Queue[QueueItem]] = None,
|
46
48
|
timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
|
47
49
|
) -> "ConcurrentSource":
|
48
50
|
is_single_threaded = initial_number_of_partitions_to_generate == 1 and num_workers == 1
|
@@ -59,12 +61,13 @@ class ConcurrentSource:
|
|
59
61
|
logger,
|
60
62
|
)
|
61
63
|
return ConcurrentSource(
|
62
|
-
threadpool,
|
63
|
-
logger,
|
64
|
-
slice_logger,
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
threadpool=threadpool,
|
65
|
+
logger=logger,
|
66
|
+
slice_logger=slice_logger,
|
67
|
+
queue=queue,
|
68
|
+
message_repository=message_repository,
|
69
|
+
initial_number_partitions_to_generate=initial_number_of_partitions_to_generate,
|
70
|
+
timeout_seconds=timeout_seconds,
|
68
71
|
)
|
69
72
|
|
70
73
|
def __init__(
|
@@ -72,6 +75,7 @@ class ConcurrentSource:
|
|
72
75
|
threadpool: ThreadPoolManager,
|
73
76
|
logger: logging.Logger,
|
74
77
|
slice_logger: SliceLogger = DebugSliceLogger(),
|
78
|
+
queue: Optional[Queue[QueueItem]] = None,
|
75
79
|
message_repository: MessageRepository = InMemoryMessageRepository(),
|
76
80
|
initial_number_partitions_to_generate: int = 1,
|
77
81
|
timeout_seconds: int = DEFAULT_TIMEOUT_SECONDS,
|
@@ -91,25 +95,28 @@ class ConcurrentSource:
|
|
91
95
|
self._initial_number_partitions_to_generate = initial_number_partitions_to_generate
|
92
96
|
self._timeout_seconds = timeout_seconds
|
93
97
|
|
98
|
+
# We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
|
99
|
+
# threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
|
100
|
+
# partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
|
101
|
+
# information and might even need to be configurable depending on the source
|
102
|
+
self._queue = queue or Queue(maxsize=10_000)
|
103
|
+
|
94
104
|
def read(
|
95
105
|
self,
|
96
106
|
streams: List[AbstractStream],
|
97
107
|
) -> Iterator[AirbyteMessage]:
|
98
108
|
self._logger.info("Starting syncing")
|
99
|
-
|
100
|
-
# We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
|
101
|
-
# threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
|
102
|
-
# partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
|
103
|
-
# information and might even need to be configurable depending on the source
|
104
|
-
queue: Queue[QueueItem] = Queue(maxsize=10_000)
|
105
109
|
concurrent_stream_processor = ConcurrentReadProcessor(
|
106
110
|
streams,
|
107
|
-
PartitionEnqueuer(
|
111
|
+
PartitionEnqueuer(self._queue, self._threadpool),
|
108
112
|
self._threadpool,
|
109
113
|
self._logger,
|
110
114
|
self._slice_logger,
|
111
115
|
self._message_repository,
|
112
|
-
PartitionReader(
|
116
|
+
PartitionReader(
|
117
|
+
self._queue,
|
118
|
+
PartitionLogger(self._slice_logger, self._logger, self._message_repository),
|
119
|
+
),
|
113
120
|
)
|
114
121
|
|
115
122
|
# Enqueue initial partition generation tasks
|
@@ -117,7 +124,7 @@ class ConcurrentSource:
|
|
117
124
|
|
118
125
|
# Read from the queue until all partitions were generated and read
|
119
126
|
yield from self._consume_from_queue(
|
120
|
-
|
127
|
+
self._queue,
|
121
128
|
concurrent_stream_processor,
|
122
129
|
)
|
123
130
|
self._threadpool.check_for_errors_and_shutdown()
|
@@ -141,7 +148,10 @@ class ConcurrentSource:
|
|
141
148
|
airbyte_message_or_record_or_exception,
|
142
149
|
concurrent_stream_processor,
|
143
150
|
)
|
144
|
-
|
151
|
+
# In the event that a partition raises an exception, anything remaining in
|
152
|
+
# the queue will be missed because is_done() can raise an exception and exit
|
153
|
+
# out of this loop before remaining items are consumed
|
154
|
+
if queue.empty() and concurrent_stream_processor.is_done():
|
145
155
|
# all partitions were generated and processed. we're done here
|
146
156
|
break
|
147
157
|
|
@@ -161,5 +171,7 @@ class ConcurrentSource:
|
|
161
171
|
yield from concurrent_stream_processor.on_partition_complete_sentinel(queue_item)
|
162
172
|
elif isinstance(queue_item, Record):
|
163
173
|
yield from concurrent_stream_processor.on_record(queue_item)
|
174
|
+
elif isinstance(queue_item, AirbyteMessage):
|
175
|
+
yield queue_item
|
164
176
|
else:
|
165
177
|
raise ValueError(f"Unknown queue item type: {type(queue_item)}")
|
@@ -3,7 +3,22 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from
|
6
|
+
from dataclasses import dataclass, field
|
7
|
+
from queue import Queue
|
8
|
+
from typing import (
|
9
|
+
Any,
|
10
|
+
ClassVar,
|
11
|
+
Generic,
|
12
|
+
Iterator,
|
13
|
+
List,
|
14
|
+
Mapping,
|
15
|
+
MutableMapping,
|
16
|
+
Optional,
|
17
|
+
Tuple,
|
18
|
+
Union,
|
19
|
+
)
|
20
|
+
|
21
|
+
from airbyte_protocol_dataclasses.models import Level
|
7
22
|
|
8
23
|
from airbyte_cdk.models import (
|
9
24
|
AirbyteCatalog,
|
@@ -43,6 +58,8 @@ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_genera
|
|
43
58
|
StreamSlicerPartitionGenerator,
|
44
59
|
)
|
45
60
|
from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
61
|
+
from airbyte_cdk.sources.message.concurrent_repository import ConcurrentMessageRepository
|
62
|
+
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository
|
46
63
|
from airbyte_cdk.sources.source import TState
|
47
64
|
from airbyte_cdk.sources.streams import Stream
|
48
65
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
@@ -50,6 +67,22 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import Abstra
|
|
50
67
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
|
51
68
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
52
69
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
70
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
|
71
|
+
|
72
|
+
|
73
|
+
@dataclass
|
74
|
+
class TestLimits:
|
75
|
+
__test__: ClassVar[bool] = False # Tell Pytest this is not a Pytest class, despite its name
|
76
|
+
|
77
|
+
DEFAULT_MAX_PAGES_PER_SLICE: ClassVar[int] = 5
|
78
|
+
DEFAULT_MAX_SLICES: ClassVar[int] = 5
|
79
|
+
DEFAULT_MAX_RECORDS: ClassVar[int] = 100
|
80
|
+
DEFAULT_MAX_STREAMS: ClassVar[int] = 100
|
81
|
+
|
82
|
+
max_records: int = field(default=DEFAULT_MAX_RECORDS)
|
83
|
+
max_pages_per_slice: int = field(default=DEFAULT_MAX_PAGES_PER_SLICE)
|
84
|
+
max_slices: int = field(default=DEFAULT_MAX_SLICES)
|
85
|
+
max_streams: int = field(default=DEFAULT_MAX_STREAMS)
|
53
86
|
|
54
87
|
|
55
88
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
@@ -65,7 +98,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
65
98
|
source_config: ConnectionDefinition,
|
66
99
|
debug: bool = False,
|
67
100
|
emit_connector_builder_messages: bool = False,
|
68
|
-
|
101
|
+
migrate_manifest: bool = False,
|
102
|
+
normalize_manifest: bool = False,
|
103
|
+
limits: Optional[TestLimits] = None,
|
69
104
|
config_path: Optional[str] = None,
|
70
105
|
**kwargs: Any,
|
71
106
|
) -> None:
|
@@ -73,21 +108,39 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
73
108
|
# no longer needs to store the original incoming state. But maybe there's an edge case?
|
74
109
|
self._connector_state_manager = ConnectorStateManager(state=state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
75
110
|
|
111
|
+
# We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
|
112
|
+
# threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
|
113
|
+
# partitions which would fill the queue. This number is arbitrarily set to 10_000 but will probably need to be changed given more
|
114
|
+
# information and might even need to be configurable depending on the source
|
115
|
+
queue: Queue[QueueItem] = Queue(maxsize=10_000)
|
116
|
+
message_repository = InMemoryMessageRepository(
|
117
|
+
Level.DEBUG if emit_connector_builder_messages else Level.INFO
|
118
|
+
)
|
119
|
+
|
76
120
|
# To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
|
77
121
|
# cursors. We do this by no longer automatically instantiating RFR cursors when converting
|
78
122
|
# the declarative models into runtime components. Concurrent sources will continue to checkpoint
|
79
123
|
# incremental streams running in full refresh.
|
80
|
-
component_factory =
|
124
|
+
component_factory = ModelToComponentFactory(
|
81
125
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
126
|
+
message_repository=ConcurrentMessageRepository(queue, message_repository),
|
82
127
|
connector_state_manager=self._connector_state_manager,
|
83
128
|
max_concurrent_async_job_count=source_config.get("max_concurrent_async_job_count"),
|
129
|
+
limit_pages_fetched_per_slice=limits.max_pages_per_slice if limits else None,
|
130
|
+
limit_slices_fetched=limits.max_slices if limits else None,
|
131
|
+
disable_retries=True if limits else False,
|
132
|
+
disable_cache=True if limits else False,
|
84
133
|
)
|
85
134
|
|
135
|
+
self._limits = limits
|
136
|
+
|
86
137
|
super().__init__(
|
87
138
|
source_config=source_config,
|
88
139
|
config=config,
|
89
140
|
debug=debug,
|
90
141
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
142
|
+
migrate_manifest=migrate_manifest,
|
143
|
+
normalize_manifest=normalize_manifest,
|
91
144
|
component_factory=component_factory,
|
92
145
|
config_path=config_path,
|
93
146
|
)
|
@@ -117,6 +170,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
117
170
|
initial_number_of_partitions_to_generate=initial_number_of_partitions_to_generate,
|
118
171
|
logger=self.logger,
|
119
172
|
slice_logger=self._slice_logger,
|
173
|
+
queue=queue,
|
120
174
|
message_repository=self.message_repository,
|
121
175
|
)
|
122
176
|
|
@@ -280,8 +334,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
280
334
|
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
281
335
|
retriever=retriever,
|
282
336
|
message_repository=self.message_repository,
|
337
|
+
max_records_limit=self._limits.max_records
|
338
|
+
if self._limits
|
339
|
+
else None,
|
283
340
|
),
|
284
341
|
stream_slicer=declarative_stream.retriever.stream_slicer,
|
342
|
+
slice_limit=self._limits.max_slices
|
343
|
+
if self._limits
|
344
|
+
else None, # technically not needed because create_declarative_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
|
285
345
|
)
|
286
346
|
else:
|
287
347
|
if (
|
@@ -311,8 +371,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
311
371
|
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
312
372
|
retriever=retriever,
|
313
373
|
message_repository=self.message_repository,
|
374
|
+
max_records_limit=self._limits.max_records
|
375
|
+
if self._limits
|
376
|
+
else None,
|
314
377
|
),
|
315
378
|
stream_slicer=cursor,
|
379
|
+
slice_limit=self._limits.max_slices if self._limits else None,
|
316
380
|
)
|
317
381
|
|
318
382
|
concurrent_streams.append(
|
@@ -341,8 +405,12 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
341
405
|
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
342
406
|
retriever=declarative_stream.retriever,
|
343
407
|
message_repository=self.message_repository,
|
408
|
+
max_records_limit=self._limits.max_records if self._limits else None,
|
344
409
|
),
|
345
410
|
declarative_stream.retriever.stream_slicer,
|
411
|
+
slice_limit=self._limits.max_slices
|
412
|
+
if self._limits
|
413
|
+
else None, # technically not needed because create_declarative_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
|
346
414
|
)
|
347
415
|
|
348
416
|
final_state_cursor = FinalStateCursor(
|
@@ -401,8 +469,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
401
469
|
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
402
470
|
retriever=retriever,
|
403
471
|
message_repository=self.message_repository,
|
472
|
+
max_records_limit=self._limits.max_records if self._limits else None,
|
404
473
|
),
|
405
474
|
perpartition_cursor,
|
475
|
+
slice_limit=self._limits.max_slices if self._limits else None,
|
406
476
|
)
|
407
477
|
|
408
478
|
concurrent_streams.append(
|
@@ -987,6 +987,18 @@ definitions:
|
|
987
987
|
- "%Y-%m-%d %H:%M:%S.%f+00:00"
|
988
988
|
- "%s"
|
989
989
|
- "%ms"
|
990
|
+
suggestions:
|
991
|
+
- "%Y-%m-%d"
|
992
|
+
- "%Y-%m-%d %H:%M:%S"
|
993
|
+
- "%Y-%m-%dT%H:%M:%S"
|
994
|
+
- "%Y-%m-%dT%H:%M:%SZ"
|
995
|
+
- "%Y-%m-%dT%H:%M:%S%z"
|
996
|
+
- "%Y-%m-%dT%H:%M:%S.%fZ"
|
997
|
+
- "%Y-%m-%dT%H:%M:%S.%f%z"
|
998
|
+
- "%Y-%m-%d %H:%M:%S.%f+00:00"
|
999
|
+
- "%s"
|
1000
|
+
- "%ms"
|
1001
|
+
- "%s_as_float"
|
990
1002
|
start_datetime:
|
991
1003
|
title: Start Datetime
|
992
1004
|
description: The datetime that determines the earliest record that should be synced.
|
@@ -1061,6 +1073,18 @@ definitions:
|
|
1061
1073
|
- "%s"
|
1062
1074
|
- "%ms"
|
1063
1075
|
- "%s_as_float"
|
1076
|
+
suggestions:
|
1077
|
+
- "%Y-%m-%d"
|
1078
|
+
- "%Y-%m-%d %H:%M:%S"
|
1079
|
+
- "%Y-%m-%dT%H:%M:%S"
|
1080
|
+
- "%Y-%m-%dT%H:%M:%SZ"
|
1081
|
+
- "%Y-%m-%dT%H:%M:%S%z"
|
1082
|
+
- "%Y-%m-%dT%H:%M:%S.%fZ"
|
1083
|
+
- "%Y-%m-%dT%H:%M:%S.%f%z"
|
1084
|
+
- "%Y-%m-%d %H:%M:%S.%f+00:00"
|
1085
|
+
- "%s"
|
1086
|
+
- "%ms"
|
1087
|
+
- "%s_as_float"
|
1064
1088
|
cursor_granularity:
|
1065
1089
|
title: Cursor Granularity
|
1066
1090
|
description: |
|
@@ -1075,6 +1099,13 @@ definitions:
|
|
1075
1099
|
type: string
|
1076
1100
|
examples:
|
1077
1101
|
- "PT1S"
|
1102
|
+
suggestions:
|
1103
|
+
- "PT0.000001S"
|
1104
|
+
- "PT0.001S"
|
1105
|
+
- "PT1S"
|
1106
|
+
- "PT1M"
|
1107
|
+
- "PT1H"
|
1108
|
+
- "P1D"
|
1078
1109
|
is_data_feed:
|
1079
1110
|
title: Data Feed API
|
1080
1111
|
description: A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.
|
@@ -1133,6 +1164,12 @@ definitions:
|
|
1133
1164
|
examples:
|
1134
1165
|
- "P1W"
|
1135
1166
|
- "{{ config['step_increment'] }}"
|
1167
|
+
suggestions:
|
1168
|
+
- "PT1H"
|
1169
|
+
- "P1D"
|
1170
|
+
- "P1W"
|
1171
|
+
- "P1M"
|
1172
|
+
- "P1Y"
|
1136
1173
|
$parameters:
|
1137
1174
|
type: object
|
1138
1175
|
additionalProperties: true
|
@@ -2804,6 +2841,18 @@ definitions:
|
|
2804
2841
|
- "%Y-%m-%dT%H:%M:%S.%f%z"
|
2805
2842
|
- "%Y-%m-%d"
|
2806
2843
|
- "%s"
|
2844
|
+
suggestions:
|
2845
|
+
- "%Y-%m-%d"
|
2846
|
+
- "%Y-%m-%d %H:%M:%S"
|
2847
|
+
- "%Y-%m-%dT%H:%M:%S"
|
2848
|
+
- "%Y-%m-%dT%H:%M:%SZ"
|
2849
|
+
- "%Y-%m-%dT%H:%M:%S%z"
|
2850
|
+
- "%Y-%m-%dT%H:%M:%S.%fZ"
|
2851
|
+
- "%Y-%m-%dT%H:%M:%S.%f%z"
|
2852
|
+
- "%Y-%m-%d %H:%M:%S.%f+00:00"
|
2853
|
+
- "%s"
|
2854
|
+
- "%ms"
|
2855
|
+
- "%s_as_float"
|
2807
2856
|
max_datetime:
|
2808
2857
|
title: Max Datetime
|
2809
2858
|
description: Ceiling applied on the datetime value. Must be formatted with the datetime_format field.
|
@@ -4142,11 +4191,9 @@ definitions:
|
|
4142
4191
|
- stream_slice
|
4143
4192
|
- stream_template_config
|
4144
4193
|
examples:
|
4145
|
-
- ["
|
4146
|
-
- ["
|
4147
|
-
- ["
|
4148
|
-
- ["data", "{{ components_values.name }}"]
|
4149
|
-
- ["data", "*", "record"]
|
4194
|
+
- ["name"]
|
4195
|
+
- ["retriever", "requester", "url"]
|
4196
|
+
- ["retriever", "requester", "{{ components_values.field }}"]
|
4150
4197
|
- ["*", "**", "name"]
|
4151
4198
|
value:
|
4152
4199
|
title: Value
|
@@ -4777,6 +4824,12 @@ interpolation:
|
|
4777
4824
|
- title: stream_slice
|
4778
4825
|
description: This variable is deprecated. Use stream_interval or stream_partition instead.
|
4779
4826
|
type: object
|
4827
|
+
- title: components_values
|
4828
|
+
description: The record object produced by the components resolver for which a stream will be generated.
|
4829
|
+
type: object
|
4830
|
+
examples:
|
4831
|
+
- name: "accounts"
|
4832
|
+
id: 1234
|
4780
4833
|
macros:
|
4781
4834
|
- title: now_utc
|
4782
4835
|
description: Returns the current date and time in the UTC timezone.
|
@@ -1463,11 +1463,9 @@ class ComponentMappingDefinition(BaseModel):
|
|
1463
1463
|
...,
|
1464
1464
|
description="A list of potentially nested fields indicating the full path where value will be added or updated.",
|
1465
1465
|
examples=[
|
1466
|
-
["
|
1467
|
-
["
|
1468
|
-
["
|
1469
|
-
["data", "{{ components_values.name }}"],
|
1470
|
-
["data", "*", "record"],
|
1466
|
+
["name"],
|
1467
|
+
["retriever", "requester", "url"],
|
1468
|
+
["retriever", "requester", "{{ components_values.field }}"],
|
1471
1469
|
["*", "**", "name"],
|
1472
1470
|
],
|
1473
1471
|
title="Field Path",
|
@@ -631,6 +631,10 @@ SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
|
631
631
|
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
|
632
632
|
}
|
633
633
|
|
634
|
+
# Ideally this should use the value defined in ConcurrentDeclarativeSource, but
|
635
|
+
# this would be a circular import
|
636
|
+
MAX_SLICES = 5
|
637
|
+
|
634
638
|
|
635
639
|
class ModelToComponentFactory:
|
636
640
|
EPOCH_DATETIME_FORMAT = "%s"
|
@@ -2086,6 +2090,12 @@ class ModelToComponentFactory:
|
|
2086
2090
|
elif concurrent_cursor:
|
2087
2091
|
cursor = concurrent_cursor
|
2088
2092
|
|
2093
|
+
# FIXME to be removed once we migrate everything to DefaultStream
|
2094
|
+
if isinstance(retriever, SimpleRetriever):
|
2095
|
+
# We zero it out here, but since this is a cursor reference, the state is still properly
|
2096
|
+
# instantiated for the other components that reference it
|
2097
|
+
retriever.cursor = None
|
2098
|
+
|
2089
2099
|
partition_generator = StreamSlicerPartitionGenerator(
|
2090
2100
|
DeclarativePartitionFactory(
|
2091
2101
|
stream_name,
|
@@ -1,9 +1,12 @@
|
|
1
|
-
# Copyright (c)
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
|
-
from typing import Any, Iterable, Mapping, Optional
|
3
|
+
from typing import Any, Iterable, Mapping, Optional, cast
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
6
6
|
from airbyte_cdk.sources.declarative.schema import SchemaLoader
|
7
|
+
from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer_test_read_decorator import (
|
8
|
+
StreamSlicerTestReadDecorator,
|
9
|
+
)
|
7
10
|
from airbyte_cdk.sources.message import MessageRepository
|
8
11
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
9
12
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
@@ -11,6 +14,11 @@ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import Stre
|
|
11
14
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
12
15
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
13
16
|
|
17
|
+
# For Connector Builder test read operations, we track the total number of records
|
18
|
+
# read for the stream at the global level so that we can stop reading early if we
|
19
|
+
# exceed the record limit
|
20
|
+
total_record_counter = 0
|
21
|
+
|
14
22
|
|
15
23
|
class SchemaLoaderCachingDecorator(SchemaLoader):
|
16
24
|
def __init__(self, schema_loader: SchemaLoader):
|
@@ -31,6 +39,7 @@ class DeclarativePartitionFactory:
|
|
31
39
|
schema_loader: SchemaLoader,
|
32
40
|
retriever: Retriever,
|
33
41
|
message_repository: MessageRepository,
|
42
|
+
max_records_limit: Optional[int] = None,
|
34
43
|
) -> None:
|
35
44
|
"""
|
36
45
|
The DeclarativePartitionFactory takes a retriever_factory and not a retriever directly. The reason is that our components are not
|
@@ -41,6 +50,7 @@ class DeclarativePartitionFactory:
|
|
41
50
|
self._schema_loader = SchemaLoaderCachingDecorator(schema_loader)
|
42
51
|
self._retriever = retriever
|
43
52
|
self._message_repository = message_repository
|
53
|
+
self._max_records_limit = max_records_limit
|
44
54
|
|
45
55
|
def create(self, stream_slice: StreamSlice) -> Partition:
|
46
56
|
return DeclarativePartition(
|
@@ -48,6 +58,7 @@ class DeclarativePartitionFactory:
|
|
48
58
|
schema_loader=self._schema_loader,
|
49
59
|
retriever=self._retriever,
|
50
60
|
message_repository=self._message_repository,
|
61
|
+
max_records_limit=self._max_records_limit,
|
51
62
|
stream_slice=stream_slice,
|
52
63
|
)
|
53
64
|
|
@@ -59,19 +70,29 @@ class DeclarativePartition(Partition):
|
|
59
70
|
schema_loader: SchemaLoader,
|
60
71
|
retriever: Retriever,
|
61
72
|
message_repository: MessageRepository,
|
73
|
+
max_records_limit: Optional[int],
|
62
74
|
stream_slice: StreamSlice,
|
63
75
|
):
|
64
76
|
self._stream_name = stream_name
|
65
77
|
self._schema_loader = schema_loader
|
66
78
|
self._retriever = retriever
|
67
79
|
self._message_repository = message_repository
|
80
|
+
self._max_records_limit = max_records_limit
|
68
81
|
self._stream_slice = stream_slice
|
69
82
|
self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
|
70
83
|
|
71
84
|
def read(self) -> Iterable[Record]:
|
85
|
+
if self._max_records_limit is not None:
|
86
|
+
global total_record_counter
|
87
|
+
if total_record_counter >= self._max_records_limit:
|
88
|
+
return
|
72
89
|
for stream_data in self._retriever.read_records(
|
73
90
|
self._schema_loader.get_json_schema(), self._stream_slice
|
74
91
|
):
|
92
|
+
if self._max_records_limit is not None:
|
93
|
+
if total_record_counter >= self._max_records_limit:
|
94
|
+
break
|
95
|
+
|
75
96
|
if isinstance(stream_data, Mapping):
|
76
97
|
record = (
|
77
98
|
stream_data
|
@@ -86,6 +107,9 @@ class DeclarativePartition(Partition):
|
|
86
107
|
else:
|
87
108
|
self._message_repository.emit_message(stream_data)
|
88
109
|
|
110
|
+
if self._max_records_limit is not None:
|
111
|
+
total_record_counter += 1
|
112
|
+
|
89
113
|
def to_slice(self) -> Optional[Mapping[str, Any]]:
|
90
114
|
return self._stream_slice
|
91
115
|
|
@@ -98,10 +122,24 @@ class DeclarativePartition(Partition):
|
|
98
122
|
|
99
123
|
class StreamSlicerPartitionGenerator(PartitionGenerator):
|
100
124
|
def __init__(
|
101
|
-
self,
|
125
|
+
self,
|
126
|
+
partition_factory: DeclarativePartitionFactory,
|
127
|
+
stream_slicer: StreamSlicer,
|
128
|
+
slice_limit: Optional[int] = None,
|
129
|
+
max_records_limit: Optional[int] = None,
|
102
130
|
) -> None:
|
103
131
|
self._partition_factory = partition_factory
|
104
|
-
|
132
|
+
|
133
|
+
if slice_limit:
|
134
|
+
self._stream_slicer = cast(
|
135
|
+
StreamSlicer,
|
136
|
+
StreamSlicerTestReadDecorator(
|
137
|
+
wrapped_slicer=stream_slicer,
|
138
|
+
maximum_number_of_slices=slice_limit,
|
139
|
+
),
|
140
|
+
)
|
141
|
+
else:
|
142
|
+
self._stream_slicer = stream_slicer
|
105
143
|
|
106
144
|
def generate(self) -> Iterable[Partition]:
|
107
145
|
for stream_slice in self._stream_slicer.stream_slices():
|
@@ -4,10 +4,10 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
from itertools import islice
|
7
|
-
from typing import Any, Iterable
|
7
|
+
from typing import Any, Iterable
|
8
8
|
|
9
9
|
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
10
|
-
from airbyte_cdk.sources.types import StreamSlice
|
10
|
+
from airbyte_cdk.sources.types import StreamSlice
|
11
11
|
|
12
12
|
|
13
13
|
@dataclass
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
from queue import Queue
|
5
|
+
from typing import Callable, Iterable
|
6
|
+
|
7
|
+
from airbyte_cdk.models import AirbyteMessage, Level
|
8
|
+
from airbyte_cdk.models import Type as MessageType
|
9
|
+
from airbyte_cdk.sources.message.repository import LogMessage, MessageRepository
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
|
11
|
+
|
12
|
+
logger = logging.getLogger("airbyte")
|
13
|
+
|
14
|
+
|
15
|
+
class ConcurrentMessageRepository(MessageRepository):
|
16
|
+
"""
|
17
|
+
Message repository that immediately loads messages onto the queue processed on the
|
18
|
+
main thread. This ensures that messages are processed in the correct order they are
|
19
|
+
received. The InMemoryMessageRepository implementation does not have guaranteed
|
20
|
+
ordering since whether to process the main thread vs. partitions is non-deterministic
|
21
|
+
and there can be a lag between reading the main-thread and consuming messages on the
|
22
|
+
MessageRepository.
|
23
|
+
|
24
|
+
This is particularly important for the connector builder which relies on grouping
|
25
|
+
of messages to organize request/response, pages, and partitions.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self, queue: Queue[QueueItem], message_repository: MessageRepository):
|
29
|
+
self._queue = queue
|
30
|
+
self._decorated_message_repository = message_repository
|
31
|
+
|
32
|
+
def emit_message(self, message: AirbyteMessage) -> None:
|
33
|
+
self._decorated_message_repository.emit_message(message)
|
34
|
+
for message in self._decorated_message_repository.consume_queue():
|
35
|
+
self._queue.put(message)
|
36
|
+
|
37
|
+
def log_message(self, level: Level, message_provider: Callable[[], LogMessage]) -> None:
|
38
|
+
self._decorated_message_repository.log_message(level, message_provider)
|
39
|
+
for message in self._decorated_message_repository.consume_queue():
|
40
|
+
self._queue.put(message)
|
41
|
+
|
42
|
+
def consume_queue(self) -> Iterable[AirbyteMessage]:
|
43
|
+
"""
|
44
|
+
This method shouldn't need to be called because as part of emit_message() we are already
|
45
|
+
loading messages onto the queue processed on the main thread.
|
46
|
+
"""
|
47
|
+
yield from []
|
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
import functools
|
6
6
|
import logging
|
7
|
+
import threading
|
7
8
|
from abc import ABC, abstractmethod
|
8
9
|
from typing import (
|
9
10
|
Any,
|
@@ -174,6 +175,12 @@ class ConcurrentCursor(Cursor):
|
|
174
175
|
self._should_be_synced_logger_triggered = False
|
175
176
|
self._clamping_strategy = clamping_strategy
|
176
177
|
|
178
|
+
# A lock is required when closing a partition because updating the cursor's concurrent_state is
|
179
|
+
# not thread safe. When multiple partitions are being closed by the cursor at the same time, it is
|
180
|
+
# possible for one partition to update concurrent_state after a second partition has already read
|
181
|
+
# the previous state. This can lead to the second partition overwriting the previous one's state.
|
182
|
+
self._lock = threading.Lock()
|
183
|
+
|
177
184
|
@property
|
178
185
|
def state(self) -> MutableMapping[str, Any]:
|
179
186
|
return self._connector_state_converter.convert_to_state_message(
|
@@ -222,6 +229,14 @@ class ConcurrentCursor(Cursor):
|
|
222
229
|
)
|
223
230
|
|
224
231
|
def observe(self, record: Record) -> None:
|
232
|
+
# Because observe writes to the most_recent_cursor_value_per_partition mapping,
|
233
|
+
# it is not thread-safe. However, this shouldn't lead to concurrency issues because
|
234
|
+
# observe() is only invoked by PartitionReader.process_partition(). Since the map is
|
235
|
+
# broken down according to partition, concurrent threads processing only read/write
|
236
|
+
# from different keys which avoids any conflicts.
|
237
|
+
#
|
238
|
+
# If we were to add thread safety, we should implement a lock per-partition
|
239
|
+
# which is instantiated during stream_slices()
|
225
240
|
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
|
226
241
|
record.associated_slice
|
227
242
|
)
|
@@ -237,13 +252,14 @@ class ConcurrentCursor(Cursor):
|
|
237
252
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
238
253
|
|
239
254
|
def close_partition(self, partition: Partition) -> None:
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
255
|
+
with self._lock:
|
256
|
+
slice_count_before = len(self._concurrent_state.get("slices", []))
|
257
|
+
self._add_slice_to_state(partition)
|
258
|
+
if slice_count_before < len(
|
259
|
+
self._concurrent_state["slices"]
|
260
|
+
): # only emit if at least one slice has been processed
|
261
|
+
self._merge_partitions()
|
262
|
+
self._emit_state_message()
|
247
263
|
self._has_closed_at_least_one_slice = True
|
248
264
|
|
249
265
|
def _add_slice_to_state(self, partition: Partition) -> None:
|
@@ -1,14 +1,45 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
import logging
|
4
4
|
from queue import Queue
|
5
|
+
from typing import Optional
|
5
6
|
|
6
7
|
from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
|
8
|
+
from airbyte_cdk.sources.message.repository import MessageRepository
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
7
10
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
8
11
|
from airbyte_cdk.sources.streams.concurrent.partitions.types import (
|
9
12
|
PartitionCompleteSentinel,
|
10
13
|
QueueItem,
|
11
14
|
)
|
15
|
+
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
16
|
+
|
17
|
+
|
18
|
+
# Since moving all the connector builder workflow to the concurrent CDK which required correct ordering
|
19
|
+
# of grouping log messages onto the main write thread using the ConcurrentMessageRepository, this
|
20
|
+
# separate flow and class that was used to log slices onto this partition's message_repository
|
21
|
+
# should just be replaced by emitting messages directly onto the repository instead of an intermediary.
|
22
|
+
class PartitionLogger:
|
23
|
+
"""
|
24
|
+
Helper class that provides a mechanism for passing a log message onto the current
|
25
|
+
partitions message repository
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
slice_logger: SliceLogger,
|
31
|
+
logger: logging.Logger,
|
32
|
+
message_repository: MessageRepository,
|
33
|
+
):
|
34
|
+
self._slice_logger = slice_logger
|
35
|
+
self._logger = logger
|
36
|
+
self._message_repository = message_repository
|
37
|
+
|
38
|
+
def log(self, partition: Partition) -> None:
|
39
|
+
if self._slice_logger.should_log_slice_message(self._logger):
|
40
|
+
self._message_repository.emit_message(
|
41
|
+
self._slice_logger.create_slice_log_message(partition.to_slice())
|
42
|
+
)
|
12
43
|
|
13
44
|
|
14
45
|
class PartitionReader:
|
@@ -18,13 +49,18 @@ class PartitionReader:
|
|
18
49
|
|
19
50
|
_IS_SUCCESSFUL = True
|
20
51
|
|
21
|
-
def __init__(
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
queue: Queue[QueueItem],
|
55
|
+
partition_logger: Optional[PartitionLogger] = None,
|
56
|
+
) -> None:
|
22
57
|
"""
|
23
58
|
:param queue: The queue to put the records in.
|
24
59
|
"""
|
25
60
|
self._queue = queue
|
61
|
+
self._partition_logger = partition_logger
|
26
62
|
|
27
|
-
def process_partition(self, partition: Partition) -> None:
|
63
|
+
def process_partition(self, partition: Partition, cursor: Cursor) -> None:
|
28
64
|
"""
|
29
65
|
Process a partition and put the records in the output queue.
|
30
66
|
When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated.
|
@@ -37,8 +73,13 @@ class PartitionReader:
|
|
37
73
|
:return: None
|
38
74
|
"""
|
39
75
|
try:
|
76
|
+
if self._partition_logger:
|
77
|
+
self._partition_logger.log(partition)
|
78
|
+
|
40
79
|
for record in partition.read():
|
41
80
|
self._queue.put(record)
|
81
|
+
cursor.observe(record)
|
82
|
+
cursor.close_partition(partition)
|
42
83
|
self._queue.put(PartitionCompleteSentinel(partition, self._IS_SUCCESSFUL))
|
43
84
|
except Exception as e:
|
44
85
|
self._queue.put(StreamThreadException(e, partition.stream_name()))
|
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
from typing import Any, Union
|
6
6
|
|
7
|
+
from airbyte_cdk.models import AirbyteMessage
|
7
8
|
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import (
|
8
9
|
PartitionGenerationCompletedSentinel,
|
9
10
|
)
|
@@ -34,5 +35,10 @@ class PartitionCompleteSentinel:
|
|
34
35
|
Typedef representing the items that can be added to the ThreadBasedConcurrentStream
|
35
36
|
"""
|
36
37
|
QueueItem = Union[
|
37
|
-
Record,
|
38
|
+
Record,
|
39
|
+
Partition,
|
40
|
+
PartitionCompleteSentinel,
|
41
|
+
PartitionGenerationCompletedSentinel,
|
42
|
+
Exception,
|
43
|
+
AirbyteMessage,
|
38
44
|
]
|
@@ -153,7 +153,10 @@ class HttpClient:
|
|
153
153
|
# * `If the application running SQLite crashes, the data will be safe, but the database [might become corrupted](https://www.sqlite.org/howtocorrupt.html#cfgerr) if the operating system crashes or the computer loses power before that data has been written to the disk surface.` in [this description](https://www.sqlite.org/pragma.html#pragma_synchronous).
|
154
154
|
backend = requests_cache.SQLiteCache(sqlite_path, fast_save=True, wal=True)
|
155
155
|
return CachedLimiterSession(
|
156
|
-
sqlite_path,
|
156
|
+
cache_name=sqlite_path,
|
157
|
+
backend=backend,
|
158
|
+
api_budget=self._api_budget,
|
159
|
+
match_headers=True,
|
157
160
|
)
|
158
161
|
else:
|
159
162
|
return LimiterSession(api_budget=self._api_budget)
|
@@ -11,6 +11,10 @@ from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
|
|
11
11
|
from airbyte_cdk.models import Type as MessageType
|
12
12
|
|
13
13
|
|
14
|
+
# Once everything runs on the concurrent CDK and we've cleaned up the legacy flows, we should try to remove
|
15
|
+
# this class and write messages directly to the message_repository instead of through the logger because for
|
16
|
+
# cases like the connector builder where ordering of messages is important, using the logger can cause
|
17
|
+
# messages to be grouped out of order. Alas work for a different day.
|
14
18
|
class SliceLogger(ABC):
|
15
19
|
"""
|
16
20
|
SliceLogger is an interface that allows us to log slices of data in a uniform way.
|
@@ -15,12 +15,12 @@ airbyte_cdk/config_observation.py,sha256=7SSPxtN0nXPkm4euGNcTTr1iLbwUL01jy-24V1H
|
|
15
15
|
airbyte_cdk/connector.py,sha256=N6TUlrZOMjLAI85JrNAKkfyTqnO5xfBCw4oEfgjJd9o,4254
|
16
16
|
airbyte_cdk/connector_builder/README.md,sha256=Hw3wvVewuHG9-QgsAq1jDiKuLlStDxKBz52ftyNRnBw,1665
|
17
17
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
18
|
-
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=
|
19
|
-
airbyte_cdk/connector_builder/main.py,sha256=
|
18
|
+
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=jRtSfj3aca006-01Hax-THJpuoysd8QR6JPGnr8q1Xg,6371
|
19
|
+
airbyte_cdk/connector_builder/main.py,sha256=F9bmdz252pvGXAdDgPwIOPw3fl5fwTU41uG49BQyItI,3883
|
20
20
|
airbyte_cdk/connector_builder/models.py,sha256=9pIZ98LW_d6fRS39VdnUOf3cxGt4TkC5MJ0_OrzcCRk,1578
|
21
21
|
airbyte_cdk/connector_builder/test_reader/__init__.py,sha256=iTwBMoI9vaJotEgpqZbFjlxRcbxXYypSVJ9YxeHk7wc,120
|
22
|
-
airbyte_cdk/connector_builder/test_reader/helpers.py,sha256=
|
23
|
-
airbyte_cdk/connector_builder/test_reader/message_grouper.py,sha256=
|
22
|
+
airbyte_cdk/connector_builder/test_reader/helpers.py,sha256=5GSrK9EVBDm5dwtudVbA-73EHh53-niRA-oj8eQVFHI,29236
|
23
|
+
airbyte_cdk/connector_builder/test_reader/message_grouper.py,sha256=bFoQMKCXJob98O6F4tgMW81cCquNOqCx2tkNXP7lPqc,7062
|
24
24
|
airbyte_cdk/connector_builder/test_reader/reader.py,sha256=DugoqS6SMrtOJ--2Y0F0h_9x8m632i7fSOPMAA0JHnc,21654
|
25
25
|
airbyte_cdk/connector_builder/test_reader/types.py,sha256=hPZG3jO03kBaPyW94NI3JHRS1jxXGSNBcN1HFzOxo5Y,2528
|
26
26
|
airbyte_cdk/destinations/__init__.py,sha256=FyDp28PT_YceJD5HDFhA-mrGfX9AONIyMQ4d68CHNxQ,213
|
@@ -57,8 +57,8 @@ airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
57
|
airbyte_cdk/sources/__init__.py,sha256=45J83QsFH3Wky3sVapZWg4C58R_i1thm61M06t2c1AQ,1156
|
58
58
|
airbyte_cdk/sources/abstract_source.py,sha256=50vxEBRByiNhT4WJkiFvgM-C6PWqKSJgvuNC_aeg2cw,15547
|
59
59
|
airbyte_cdk/sources/concurrent_source/__init__.py,sha256=3D_RJsxQfiLboSCDdNei1Iv-msRp3DXsas6E9kl7dXc,386
|
60
|
-
airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py,sha256=
|
61
|
-
airbyte_cdk/sources/concurrent_source/concurrent_source.py,sha256=
|
60
|
+
airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py,sha256=qUi9zRjSEOsu3k6niRFjVn4l29OOMZVstIUoqNLw4_k,12208
|
61
|
+
airbyte_cdk/sources/concurrent_source/concurrent_source.py,sha256=2sdgOnWtvClZaibnfn4Yardv2Jjv_lfwMubublDl1ZY,8458
|
62
62
|
airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py,sha256=f9PIRPWn2tXu0-bxVeYHL2vYdqCzZ_kgpHg5_Ep-cfQ,6103
|
63
63
|
airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py,sha256=z1t-rAZBsqVidv2fpUlPHE9JgyXsITuGk4AMu96mXSQ,696
|
64
64
|
airbyte_cdk/sources/concurrent_source/stream_thread_exception.py,sha256=-q6mG2145HKQ28rZGD1bUmjPlIZ1S7-Yhewl8Ntu6xI,764
|
@@ -86,11 +86,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=sV-ZY7dZ03V8GdAxPY
|
|
86
86
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
87
87
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
88
88
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
89
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
89
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=5nvL9wbcAnReCeXM1Krezvyh-Plgx_yPS-CVhO_thHY,31043
|
90
90
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
91
91
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
|
92
92
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
93
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
93
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=vA7Rt2XswBk027Hw61i-0A2sJW5ohptlCAI77xLoQkU,188222
|
94
94
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=qmyMnnet92eGc3C22yBtpvD5UZjqdhsAafP_zxI5wp8,1814
|
95
95
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=dCRlddBUSaJmBNBz1pSO1r2rTw8AP5d2_vlmIeGs2gg,10767
|
96
96
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
@@ -134,14 +134,14 @@ airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migrati
|
|
134
134
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
135
135
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
136
136
|
airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py,sha256=Imnj3yef0aqRdLfaUxkIYISUb8YkiPrRH_wBd-x8HjM,5999
|
137
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
137
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=EjBqLoiNgExQcAtKLnUJ9t4vyhcRlDFXvr5ZS_VuAr4,132254
|
138
138
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
139
139
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
|
140
140
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9Rbu5OexXSDN9QWDo8YU4tT9M2LDVOgGA,802
|
141
141
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=2UdpCz3yi7ISZTyqkQXSSy3dMxeyOWqV7OlAS5b9GVg,11568
|
142
142
|
airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
|
143
143
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
|
144
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
144
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=dh8lCF7ZyhXW-euR_v4MqoHY2zQsPOeSPA8Hm4Yu4h8,184820
|
145
145
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
|
146
146
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
147
147
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -220,9 +220,9 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
|
|
220
220
|
airbyte_cdk/sources/declarative/spec/__init__.py,sha256=9FYO-fVOclrwjAW4qwRTbZRVopTc9rOaauAJfThdNCQ,177
|
221
221
|
airbyte_cdk/sources/declarative/spec/spec.py,sha256=SwL_pfXZgcLYLJY-MAeFMHug9oYh2tOWjgG0C3DoLOY,3602
|
222
222
|
airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=UX-cP_C-9FIFFPL9z8nuxu_rglssRsMOqQmQHN8FLB8,341
|
223
|
-
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=
|
223
|
+
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=P3LqmgprbkW-Fy7c57meWny7D66XHY1EFjDw0ZzZLJk,5704
|
224
224
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
|
225
|
-
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer_test_read_decorator.py,sha256=
|
225
|
+
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer_test_read_decorator.py,sha256=4vit5ADyhoZnd1psRVeM5jdySYzhjwspLVXxh8vt1M8,944
|
226
226
|
airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
|
227
227
|
airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=Eg1jQtRObgzxbtySTQs5uEZIjEklsoHFxYSPf78x6Ng,5420
|
228
228
|
airbyte_cdk/sources/declarative/transformations/config_transformations/__init__.py,sha256=GaU3ezFa5opeDgdlNohX6TXsWJlOD2jOfJXQWeQCh7E,263
|
@@ -300,6 +300,7 @@ airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs
|
|
300
300
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
301
301
|
airbyte_cdk/sources/http_logger.py,sha256=H93kPAujHhPmXNX0JSFG3D-SL6yEFA5PtKot9Hu3TYA,1690
|
302
302
|
airbyte_cdk/sources/message/__init__.py,sha256=y98fzHsQBwXwp2zEa4K5mxGFqjnx9lDn9O0pTk-VS4U,395
|
303
|
+
airbyte_cdk/sources/message/concurrent_repository.py,sha256=HPMjhz5hEhjoaatjPL0Jo5VEI26B4fuo-ESIM0zIhGI,2113
|
303
304
|
airbyte_cdk/sources/message/repository.py,sha256=SG7avgti_-dj8FcRHTTrhgLLGJbElv14_zIB0SH8AIc,4763
|
304
305
|
airbyte_cdk/sources/source.py,sha256=KIBBH5VLEb8BZ8B9aROlfaI6OLoJqKDPMJ10jkAR7nk,3611
|
305
306
|
airbyte_cdk/sources/specs/transfer_modes.py,sha256=sfSVO0yT6SaGKN5_TP0Nl_ftG0yPhecaBv0WkhAEXA8,932
|
@@ -319,18 +320,18 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCB
|
|
319
320
|
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=h4ZewhWn2PzPTt0lZZjcUL4rrpW9E_of7prnI3bm-c4,14004
|
320
321
|
airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=M0XmvF3vjlr4GbCM0XH1hAj7udiAONM9SnmXjqufzLM,1035
|
321
322
|
airbyte_cdk/sources/streams/concurrent/clamping.py,sha256=i26GVyui2ScEXSP-IP_61K2HaTp1-6lTlYHsZVYpuZA,3240
|
322
|
-
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=
|
323
|
+
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Dxjx4IAHZ6HHyfJ-B5SUTTYgdb1ZiiBKsZm3pYUquzk,23411
|
323
324
|
airbyte_cdk/sources/streams/concurrent/cursor_types.py,sha256=ZyWLPpeLX1qXcP5MwS-wxK11IBMsnVPCw9zx8gA2_Ro,843
|
324
325
|
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=SSufbo5f7OOYS8DZaABXeJVvodcfp9wb8J9lT5Xik3s,4744
|
325
326
|
airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
|
326
327
|
airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
|
327
328
|
airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=2t64b_z9cEPmlHZnjSiMTO8PEtEdiAJDG0JcYOtUqAE,3363
|
328
|
-
airbyte_cdk/sources/streams/concurrent/partition_reader.py,sha256=
|
329
|
+
airbyte_cdk/sources/streams/concurrent/partition_reader.py,sha256=qKd1NBlAjwo4mRVq9bowWr7oYg8SpgISklpkjUkie4k,3434
|
329
330
|
airbyte_cdk/sources/streams/concurrent/partitions/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
330
331
|
airbyte_cdk/sources/streams/concurrent/partitions/partition.py,sha256=CmaRcKn8y118No3qvbRV9DBeAUKv17lrVgloR4Y9TwU,1490
|
331
332
|
airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py,sha256=_ymkkBr71_qt1fW0_MUqw96OfNBkeJngXQ09yolEDHw,441
|
332
333
|
airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py,sha256=zQPikLIt0yhP9EwZaPglRTIqFCauo4pSsJk_7kYq9Aw,1406
|
333
|
-
airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=
|
334
|
+
airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=EdPHmMcyZhRYtO2cniIQAQpzPfGCBpzmAJ3NTVS4qbo,1249
|
334
335
|
airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
335
336
|
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=CCxCbgvUugxiWpHX8-dkkJHWKDjL5iwiIbOUj8KIJ9c,7079
|
336
337
|
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=x8MLm1pTMfLNHvMF3P1ixYkYt_xjpbaIwnvhY_ofdBo,8076
|
@@ -349,7 +350,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
|
|
349
350
|
airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
|
350
351
|
airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
|
351
352
|
airbyte_cdk/sources/streams/http/http.py,sha256=0uariNq8OFnlX7iqOHwBhecxA-Hfd5hSY8_XCEgn3jI,28499
|
352
|
-
airbyte_cdk/sources/streams/http/http_client.py,sha256=
|
353
|
+
airbyte_cdk/sources/streams/http/http_client.py,sha256=FXOBMBocszQkbK1ENfcMBQoHtL1EUWA4KsI6oJE8Ni8,23071
|
353
354
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
354
355
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
355
356
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=0WfnxuxDwRYeq-PIwdUjJujDnxuJPhNfHlX_8aNHtYU,19663
|
@@ -364,7 +365,7 @@ airbyte_cdk/sources/utils/casing.py,sha256=QC-gV1O4e8DR4-bhdXieUPKm_JamzslVyfABL
|
|
364
365
|
airbyte_cdk/sources/utils/files_directory.py,sha256=z8Dmr-wkL1sAqdwCST4MBUFAyMHPD2cJIzVdAuCynp8,391
|
365
366
|
airbyte_cdk/sources/utils/record_helper.py,sha256=7wL-pDYrBpcmZHa8ORtiSOqBZJEZI5hdl2dA1RYiatk,2029
|
366
367
|
airbyte_cdk/sources/utils/schema_helpers.py,sha256=bR3I70-e11S6B8r6VK-pthQXtcYrXojgXFvuK7lRrpg,8545
|
367
|
-
airbyte_cdk/sources/utils/slice_logger.py,sha256=
|
368
|
+
airbyte_cdk/sources/utils/slice_logger.py,sha256=M1TvcYGMftXR841XdJmeEpKpQqrdOD5X-qsspfAMKMs,2168
|
368
369
|
airbyte_cdk/sources/utils/transform.py,sha256=0LOvIJg1vmg_70AiAVe-YHMr-LHrqEuxg9cm1BnYPDM,11725
|
369
370
|
airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
|
370
371
|
airbyte_cdk/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -424,9 +425,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
424
425
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
425
426
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
426
427
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
427
|
-
airbyte_cdk-6.
|
428
|
-
airbyte_cdk-6.
|
429
|
-
airbyte_cdk-6.
|
430
|
-
airbyte_cdk-6.
|
431
|
-
airbyte_cdk-6.
|
432
|
-
airbyte_cdk-6.
|
428
|
+
airbyte_cdk-6.61.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
429
|
+
airbyte_cdk-6.61.1.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
430
|
+
airbyte_cdk-6.61.1.dist-info/METADATA,sha256=ZaU6cQVOtVQiXUd8WXEI_dIFAN2Gwm6XP_FB-cTVKfM,6477
|
431
|
+
airbyte_cdk-6.61.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
432
|
+
airbyte_cdk-6.61.1.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
|
433
|
+
airbyte_cdk-6.61.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|