airbyte-cdk 0.54.0__py3-none-any.whl → 0.55.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
- airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
- airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
- airbyte_cdk/sources/utils/slice_logger.py +5 -0
- {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +35 -23
- unit_tests/sources/concurrent_source/__init__.py +3 -0
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
- unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
- unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
- unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
- unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
- unit_tests/sources/streams/test_stream_read.py +1 -2
- unit_tests/sources/test_concurrent_source.py +105 -0
- unit_tests/sources/test_source_read.py +461 -0
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
- {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -116,15 +116,14 @@ test_stream_facade_single_stream = (
|
|
116
116
|
.set_expected_logs(
|
117
117
|
{
|
118
118
|
"read": [
|
119
|
-
{"level": "INFO", "message": "Starting syncing
|
119
|
+
{"level": "INFO", "message": "Starting syncing"},
|
120
120
|
{"level": "INFO", "message": "Marking stream stream1 as STARTED"},
|
121
121
|
{"level": "INFO", "message": "Syncing stream: stream1"},
|
122
122
|
{"level": "INFO", "message": "Marking stream stream1 as RUNNING"},
|
123
123
|
{"level": "INFO", "message": "Read 2 records from stream1 stream"},
|
124
124
|
{"level": "INFO", "message": "Marking stream stream1 as STOPPED"},
|
125
125
|
{"level": "INFO", "message": "Finished syncing stream1"},
|
126
|
-
{"level": "INFO", "message": "
|
127
|
-
{"level": "INFO", "message": "Finished syncing StreamFacadeSource"},
|
126
|
+
{"level": "INFO", "message": "Finished syncing"},
|
128
127
|
]
|
129
128
|
}
|
130
129
|
)
|
@@ -1,25 +1,23 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
5
4
|
import logging
|
6
5
|
|
7
6
|
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
7
|
+
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
8
8
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
9
|
-
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
10
|
-
from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
|
11
9
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
|
12
10
|
from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import (
|
13
11
|
AlwaysAvailableAvailabilityStrategy,
|
14
12
|
ConcurrentSourceBuilder,
|
15
13
|
InMemoryPartition,
|
16
14
|
InMemoryPartitionGenerator,
|
17
|
-
NeverLogSliceLogger,
|
18
15
|
)
|
19
16
|
|
20
|
-
_id_only_stream =
|
21
|
-
partition_generator=InMemoryPartitionGenerator(
|
22
|
-
|
17
|
+
_id_only_stream = DefaultStream(
|
18
|
+
partition_generator=InMemoryPartitionGenerator(
|
19
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
|
20
|
+
),
|
23
21
|
name="stream1",
|
24
22
|
json_schema={
|
25
23
|
"type": "object",
|
@@ -30,15 +28,13 @@ _id_only_stream = ThreadBasedConcurrentStream(
|
|
30
28
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
31
29
|
primary_key=[],
|
32
30
|
cursor_field=None,
|
33
|
-
slice_logger=NeverLogSliceLogger(),
|
34
31
|
logger=logging.getLogger("test_logger"),
|
35
|
-
message_repository=None,
|
36
|
-
timeout_seconds=300,
|
37
32
|
)
|
38
33
|
|
39
|
-
_id_only_stream_with_slice_logger =
|
40
|
-
partition_generator=InMemoryPartitionGenerator(
|
41
|
-
|
34
|
+
_id_only_stream_with_slice_logger = DefaultStream(
|
35
|
+
partition_generator=InMemoryPartitionGenerator(
|
36
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
|
37
|
+
),
|
42
38
|
name="stream1",
|
43
39
|
json_schema={
|
44
40
|
"type": "object",
|
@@ -49,15 +45,13 @@ _id_only_stream_with_slice_logger = ThreadBasedConcurrentStream(
|
|
49
45
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
50
46
|
primary_key=[],
|
51
47
|
cursor_field=None,
|
52
|
-
slice_logger=AlwaysLogSliceLogger(),
|
53
48
|
logger=logging.getLogger("test_logger"),
|
54
|
-
message_repository=None,
|
55
|
-
timeout_seconds=300,
|
56
49
|
)
|
57
50
|
|
58
|
-
_id_only_stream_with_primary_key =
|
59
|
-
partition_generator=InMemoryPartitionGenerator(
|
60
|
-
|
51
|
+
_id_only_stream_with_primary_key = DefaultStream(
|
52
|
+
partition_generator=InMemoryPartitionGenerator(
|
53
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
|
54
|
+
),
|
61
55
|
name="stream1",
|
62
56
|
json_schema={
|
63
57
|
"type": "object",
|
@@ -68,20 +62,16 @@ _id_only_stream_with_primary_key = ThreadBasedConcurrentStream(
|
|
68
62
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
69
63
|
primary_key=["id"],
|
70
64
|
cursor_field=None,
|
71
|
-
slice_logger=NeverLogSliceLogger(),
|
72
65
|
logger=logging.getLogger("test_logger"),
|
73
|
-
message_repository=None,
|
74
|
-
timeout_seconds=300,
|
75
66
|
)
|
76
67
|
|
77
|
-
_id_only_stream_multiple_partitions =
|
68
|
+
_id_only_stream_multiple_partitions = DefaultStream(
|
78
69
|
partition_generator=InMemoryPartitionGenerator(
|
79
70
|
[
|
80
|
-
InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
|
81
|
-
InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
|
71
|
+
InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
|
72
|
+
InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
|
82
73
|
]
|
83
74
|
),
|
84
|
-
max_workers=1,
|
85
75
|
name="stream1",
|
86
76
|
json_schema={
|
87
77
|
"type": "object",
|
@@ -92,20 +82,16 @@ _id_only_stream_multiple_partitions = ThreadBasedConcurrentStream(
|
|
92
82
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
93
83
|
primary_key=[],
|
94
84
|
cursor_field=None,
|
95
|
-
slice_logger=NeverLogSliceLogger(),
|
96
85
|
logger=logging.getLogger("test_logger"),
|
97
|
-
message_repository=None,
|
98
|
-
timeout_seconds=300,
|
99
86
|
)
|
100
87
|
|
101
|
-
_id_only_stream_multiple_partitions_concurrency_level_two =
|
88
|
+
_id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
|
102
89
|
partition_generator=InMemoryPartitionGenerator(
|
103
90
|
[
|
104
|
-
InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
|
105
|
-
InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
|
91
|
+
InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
|
92
|
+
InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
|
106
93
|
]
|
107
94
|
),
|
108
|
-
max_workers=2,
|
109
95
|
name="stream1",
|
110
96
|
json_schema={
|
111
97
|
"type": "object",
|
@@ -116,17 +102,13 @@ _id_only_stream_multiple_partitions_concurrency_level_two = ThreadBasedConcurren
|
|
116
102
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
117
103
|
primary_key=[],
|
118
104
|
cursor_field=None,
|
119
|
-
slice_logger=NeverLogSliceLogger(),
|
120
105
|
logger=logging.getLogger("test_logger"),
|
121
|
-
message_repository=None,
|
122
|
-
timeout_seconds=300,
|
123
106
|
)
|
124
107
|
|
125
|
-
_stream_raising_exception =
|
108
|
+
_stream_raising_exception = DefaultStream(
|
126
109
|
partition_generator=InMemoryPartitionGenerator(
|
127
|
-
[InMemoryPartition("partition1", None, [Record({"id": "1"}), ValueError("test exception")])]
|
110
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), ValueError("test exception")])]
|
128
111
|
),
|
129
|
-
max_workers=1,
|
130
112
|
name="stream1",
|
131
113
|
json_schema={
|
132
114
|
"type": "object",
|
@@ -137,10 +119,7 @@ _stream_raising_exception = ThreadBasedConcurrentStream(
|
|
137
119
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
138
120
|
primary_key=[],
|
139
121
|
cursor_field=None,
|
140
|
-
slice_logger=NeverLogSliceLogger(),
|
141
122
|
logger=logging.getLogger("test_logger"),
|
142
|
-
message_repository=None,
|
143
|
-
timeout_seconds=300,
|
144
123
|
)
|
145
124
|
|
146
125
|
test_concurrent_cdk_single_stream = (
|
@@ -165,15 +144,14 @@ test_concurrent_cdk_single_stream = (
|
|
165
144
|
.set_expected_logs(
|
166
145
|
{
|
167
146
|
"read": [
|
168
|
-
{"level": "INFO", "message": "Starting syncing
|
147
|
+
{"level": "INFO", "message": "Starting syncing"},
|
169
148
|
{"level": "INFO", "message": "Marking stream stream1 as STARTED"},
|
170
149
|
{"level": "INFO", "message": "Syncing stream: stream1"},
|
171
150
|
{"level": "INFO", "message": "Marking stream stream1 as RUNNING"},
|
172
151
|
{"level": "INFO", "message": "Read 2 records from stream1 stream"},
|
173
152
|
{"level": "INFO", "message": "Marking stream stream1 as STOPPED"},
|
174
153
|
{"level": "INFO", "message": "Finished syncing stream1"},
|
175
|
-
{"level": "INFO", "message": "
|
176
|
-
{"level": "INFO", "message": "Finished syncing ConcurrentCdkSource"},
|
154
|
+
{"level": "INFO", "message": "Finished syncing"},
|
177
155
|
]
|
178
156
|
}
|
179
157
|
)
|
@@ -202,11 +180,13 @@ test_concurrent_cdk_single_stream_with_primary_key = (
|
|
202
180
|
.set_name("test_concurrent_cdk_single_stream_with_primary_key")
|
203
181
|
.set_config({})
|
204
182
|
.set_source_builder(
|
205
|
-
ConcurrentSourceBuilder()
|
183
|
+
ConcurrentSourceBuilder()
|
184
|
+
.set_streams(
|
206
185
|
[
|
207
186
|
_id_only_stream_with_primary_key,
|
208
187
|
]
|
209
188
|
)
|
189
|
+
.set_message_repository(InMemoryMessageRepository())
|
210
190
|
)
|
211
191
|
.set_expected_records(
|
212
192
|
[
|
@@ -239,14 +219,21 @@ test_concurrent_cdk_multiple_streams = (
|
|
239
219
|
.set_name("test_concurrent_cdk_multiple_streams")
|
240
220
|
.set_config({})
|
241
221
|
.set_source_builder(
|
242
|
-
ConcurrentSourceBuilder()
|
222
|
+
ConcurrentSourceBuilder()
|
223
|
+
.set_streams(
|
243
224
|
[
|
244
225
|
_id_only_stream,
|
245
|
-
|
226
|
+
DefaultStream(
|
246
227
|
partition_generator=InMemoryPartitionGenerator(
|
247
|
-
[
|
228
|
+
[
|
229
|
+
InMemoryPartition(
|
230
|
+
"partition1",
|
231
|
+
"stream2",
|
232
|
+
None,
|
233
|
+
[Record({"id": "10", "key": "v1"}, "stream2"), Record({"id": "20", "key": "v2"}, "stream2")],
|
234
|
+
)
|
235
|
+
]
|
248
236
|
),
|
249
|
-
max_workers=1,
|
250
237
|
name="stream2",
|
251
238
|
json_schema={
|
252
239
|
"type": "object",
|
@@ -258,13 +245,11 @@ test_concurrent_cdk_multiple_streams = (
|
|
258
245
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
259
246
|
primary_key=[],
|
260
247
|
cursor_field=None,
|
261
|
-
slice_logger=NeverLogSliceLogger(),
|
262
248
|
logger=logging.getLogger("test_logger"),
|
263
|
-
message_repository=None,
|
264
|
-
timeout_seconds=300,
|
265
249
|
),
|
266
250
|
]
|
267
251
|
)
|
252
|
+
.set_message_repository(InMemoryMessageRepository())
|
268
253
|
)
|
269
254
|
.set_expected_records(
|
270
255
|
[
|
@@ -347,11 +332,13 @@ test_concurrent_cdk_single_stream_multiple_partitions = (
|
|
347
332
|
.set_name("test_concurrent_cdk_single_stream_multiple_partitions")
|
348
333
|
.set_config({})
|
349
334
|
.set_source_builder(
|
350
|
-
ConcurrentSourceBuilder()
|
335
|
+
ConcurrentSourceBuilder()
|
336
|
+
.set_streams(
|
351
337
|
[
|
352
338
|
_id_only_stream_multiple_partitions,
|
353
339
|
]
|
354
340
|
)
|
341
|
+
.set_message_repository(InMemoryMessageRepository())
|
355
342
|
)
|
356
343
|
.set_expected_records(
|
357
344
|
[
|
@@ -385,11 +372,13 @@ test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two = (
|
|
385
372
|
.set_name("test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_2")
|
386
373
|
.set_config({})
|
387
374
|
.set_source_builder(
|
388
|
-
ConcurrentSourceBuilder()
|
375
|
+
ConcurrentSourceBuilder()
|
376
|
+
.set_streams(
|
389
377
|
[
|
390
378
|
_id_only_stream_multiple_partitions_concurrency_level_two,
|
391
379
|
]
|
392
380
|
)
|
381
|
+
.set_message_repository(InMemoryMessageRepository())
|
393
382
|
)
|
394
383
|
.set_expected_records(
|
395
384
|
[
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
5
4
|
import json
|
6
5
|
import logging
|
7
6
|
from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
|
8
7
|
|
9
8
|
from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
|
10
|
-
from airbyte_cdk.sources import
|
9
|
+
from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
|
10
|
+
from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
|
11
11
|
from airbyte_cdk.sources.message import MessageRepository
|
12
12
|
from airbyte_cdk.sources.streams import Stream
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
|
15
15
|
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
16
|
+
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
16
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
17
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
18
19
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
19
|
-
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
20
20
|
from airbyte_cdk.sources.streams.core import StreamData
|
21
21
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
22
22
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
@@ -37,17 +37,18 @@ class LegacyStream(Stream):
|
|
37
37
|
yield from []
|
38
38
|
|
39
39
|
|
40
|
-
class ConcurrentCdkSource(
|
41
|
-
def __init__(self, streams: List[
|
40
|
+
class ConcurrentCdkSource(ConcurrentSourceAdapter):
|
41
|
+
def __init__(self, streams: List[DefaultStream], message_repository: Optional[MessageRepository], max_workers, timeout_in_seconds):
|
42
|
+
concurrent_source = ConcurrentSource.create(1, 1, streams[0]._logger, NeverLogSliceLogger(), message_repository)
|
43
|
+
super().__init__(concurrent_source)
|
42
44
|
self._streams = streams
|
43
|
-
self._message_repository = message_repository
|
44
45
|
|
45
46
|
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
|
46
47
|
# Check is not verified because it is up to the source to implement this method
|
47
48
|
return True, None
|
48
49
|
|
49
50
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
50
|
-
return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
|
51
|
+
return [StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger) for s in self._streams]
|
51
52
|
|
52
53
|
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
53
54
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -56,7 +57,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
56
57
|
return ConfiguredAirbyteCatalog(
|
57
58
|
streams=[
|
58
59
|
ConfiguredAirbyteStream(
|
59
|
-
stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
|
60
|
+
stream=StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
|
60
61
|
sync_mode=SyncMode.full_refresh,
|
61
62
|
destination_sync_mode=DestinationSyncMode.overwrite,
|
62
63
|
)
|
@@ -78,10 +79,15 @@ class InMemoryPartitionGenerator(PartitionGenerator):
|
|
78
79
|
|
79
80
|
|
80
81
|
class InMemoryPartition(Partition):
|
81
|
-
def
|
82
|
+
def stream_name(self) -> str:
|
83
|
+
return self._stream_name
|
84
|
+
|
85
|
+
def __init__(self, name, stream_name, _slice, records):
|
82
86
|
self._name = name
|
87
|
+
self._stream_name = stream_name
|
83
88
|
self._slice = _slice
|
84
89
|
self._records = records
|
90
|
+
self._is_closed = False
|
85
91
|
|
86
92
|
def read(self) -> Iterable[Record]:
|
87
93
|
for record_or_exception in self._records:
|
@@ -101,19 +107,22 @@ class InMemoryPartition(Partition):
|
|
101
107
|
else:
|
102
108
|
return hash(self._name)
|
103
109
|
|
110
|
+
def close(self) -> None:
|
111
|
+
self._is_closed = True
|
112
|
+
|
113
|
+
def is_closed(self) -> bool:
|
114
|
+
return self._is_closed
|
115
|
+
|
104
116
|
|
105
117
|
class ConcurrentSourceBuilder(SourceBuilder[ConcurrentCdkSource]):
|
106
118
|
def __init__(self):
|
107
|
-
self._streams: List[
|
119
|
+
self._streams: List[DefaultStream] = []
|
108
120
|
self._message_repository = None
|
109
121
|
|
110
122
|
def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> ConcurrentCdkSource:
|
111
|
-
|
112
|
-
if not stream._message_repository:
|
113
|
-
stream._message_repository = self._message_repository
|
114
|
-
return ConcurrentCdkSource(self._streams, self._message_repository)
|
123
|
+
return ConcurrentCdkSource(self._streams, self._message_repository, 1, 1)
|
115
124
|
|
116
|
-
def set_streams(self, streams: List[
|
125
|
+
def set_streams(self, streams: List[DefaultStream]) -> "ConcurrentSourceBuilder":
|
117
126
|
self._streams = streams
|
118
127
|
return self
|
119
128
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
4
|
+
import logging
|
5
5
|
import unittest
|
6
6
|
from unittest.mock import Mock
|
7
7
|
|
@@ -17,15 +17,18 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
|
|
17
17
|
StreamPartitionGenerator,
|
18
18
|
)
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
|
20
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
21
21
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
22
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
23
|
from airbyte_cdk.sources.streams.core import Stream
|
24
|
+
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
24
25
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
25
26
|
|
26
27
|
_ANY_SYNC_MODE = SyncMode.full_refresh
|
27
28
|
_ANY_STATE = {"state_key": "state_value"}
|
28
29
|
_ANY_CURSOR_FIELD = ["a", "cursor", "key"]
|
30
|
+
_STREAM_NAME = "stream"
|
31
|
+
_ANY_CURSOR = Mock(spec=Cursor)
|
29
32
|
|
30
33
|
|
31
34
|
@pytest.mark.parametrize(
|
@@ -77,7 +80,7 @@ def test_stream_partition_generator(sync_mode):
|
|
77
80
|
stream_slices = [{"slice": 1}, {"slice": 2}]
|
78
81
|
stream.stream_slices.return_value = stream_slices
|
79
82
|
|
80
|
-
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
83
|
+
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
|
81
84
|
|
82
85
|
partitions = list(partition_generator.generate())
|
83
86
|
slices = [partition.to_slice() for partition in partitions]
|
@@ -88,16 +91,21 @@ def test_stream_partition_generator(sync_mode):
|
|
88
91
|
@pytest.mark.parametrize(
|
89
92
|
"transformer, expected_records",
|
90
93
|
[
|
91
|
-
pytest.param(
|
94
|
+
pytest.param(
|
95
|
+
TypeTransformer(TransformConfig.NoTransform),
|
96
|
+
[Record({"data": "1"}, _STREAM_NAME), Record({"data": "2"}, _STREAM_NAME)],
|
97
|
+
id="test_no_transform",
|
98
|
+
),
|
92
99
|
pytest.param(
|
93
100
|
TypeTransformer(TransformConfig.DefaultSchemaNormalization),
|
94
|
-
[Record({"data": 1}), Record({"data": 2})],
|
101
|
+
[Record({"data": 1}, _STREAM_NAME), Record({"data": 2}, _STREAM_NAME)],
|
95
102
|
id="test_default_transform",
|
96
103
|
),
|
97
104
|
],
|
98
105
|
)
|
99
106
|
def test_stream_partition(transformer, expected_records):
|
100
107
|
stream = Mock()
|
108
|
+
stream.name = _STREAM_NAME
|
101
109
|
stream.get_json_schema.return_value = {"type": "object", "properties": {"data": {"type": ["integer"]}}}
|
102
110
|
stream.transformer = transformer
|
103
111
|
message_repository = InMemoryMessageRepository()
|
@@ -105,7 +113,7 @@ def test_stream_partition(transformer, expected_records):
|
|
105
113
|
sync_mode = SyncMode.full_refresh
|
106
114
|
cursor_field = None
|
107
115
|
state = None
|
108
|
-
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
|
116
|
+
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state, _ANY_CURSOR)
|
109
117
|
|
110
118
|
a_log_message = AirbyteMessage(
|
111
119
|
type=MessageType.LOG,
|
@@ -139,7 +147,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
139
147
|
message_repository = InMemoryMessageRepository()
|
140
148
|
_slice = None
|
141
149
|
|
142
|
-
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
150
|
+
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
|
143
151
|
|
144
152
|
stream.read_records.side_effect = Exception()
|
145
153
|
|
@@ -159,7 +167,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
159
167
|
def test_stream_partition_hash(_slice, expected_hash):
|
160
168
|
stream = Mock()
|
161
169
|
stream.name = "stream"
|
162
|
-
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
170
|
+
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
|
163
171
|
|
164
172
|
_hash = partition.__hash__()
|
165
173
|
assert _hash == expected_hash
|
@@ -176,10 +184,11 @@ class StreamFacadeTest(unittest.TestCase):
|
|
176
184
|
)
|
177
185
|
self._legacy_stream = Mock(spec=Stream)
|
178
186
|
self._cursor = Mock(spec=Cursor)
|
179
|
-
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
|
180
187
|
self._logger = Mock()
|
188
|
+
self._slice_logger = Mock()
|
189
|
+
self._slice_logger.should_log_slice_message.return_value = False
|
190
|
+
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor, self._slice_logger, self._logger)
|
181
191
|
self._source = Mock()
|
182
|
-
self._max_workers = 10
|
183
192
|
|
184
193
|
self._stream = Mock()
|
185
194
|
self._stream.primary_key = "id"
|
@@ -206,12 +215,16 @@ class StreamFacadeTest(unittest.TestCase):
|
|
206
215
|
|
207
216
|
def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
|
208
217
|
assert (
|
209
|
-
StreamFacade(
|
218
|
+
StreamFacade(
|
219
|
+
self._abstract_stream, self._legacy_stream, _ANY_CURSOR, Mock(spec=SliceLogger), Mock(spec=logging.Logger)
|
220
|
+
).supports_incremental
|
210
221
|
== self._legacy_stream.supports_incremental
|
211
222
|
)
|
212
223
|
|
213
224
|
def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
|
214
|
-
assert StreamFacade(
|
225
|
+
assert StreamFacade(
|
226
|
+
self._abstract_stream, self._legacy_stream, Mock(spec=Cursor), Mock(spec=SliceLogger), Mock(spec=logging.Logger)
|
227
|
+
).supports_incremental
|
215
228
|
|
216
229
|
def test_check_availability_is_delegated_to_wrapped_stream(self):
|
217
230
|
availability = StreamAvailable()
|
@@ -221,8 +234,11 @@ class StreamFacadeTest(unittest.TestCase):
|
|
221
234
|
|
222
235
|
def test_full_refresh(self):
|
223
236
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
224
|
-
records = [Record(data) for data in expected_stream_data]
|
225
|
-
|
237
|
+
records = [Record(data, "stream") for data in expected_stream_data]
|
238
|
+
|
239
|
+
partition = Mock()
|
240
|
+
partition.read.return_value = records
|
241
|
+
self._abstract_stream.generate_partitions.return_value = [partition]
|
226
242
|
|
227
243
|
actual_stream_data = list(self._facade.read_records(SyncMode.full_refresh, None, None, None))
|
228
244
|
|
@@ -230,8 +246,10 @@ class StreamFacadeTest(unittest.TestCase):
|
|
230
246
|
|
231
247
|
def test_read_records_full_refresh(self):
|
232
248
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
233
|
-
records = [Record(data) for data in expected_stream_data]
|
234
|
-
|
249
|
+
records = [Record(data, "stream") for data in expected_stream_data]
|
250
|
+
partition = Mock()
|
251
|
+
partition.read.return_value = records
|
252
|
+
self._abstract_stream.generate_partitions.return_value = [partition]
|
235
253
|
|
236
254
|
actual_stream_data = list(self._facade.read_full_refresh(None, None, None))
|
237
255
|
|
@@ -239,8 +257,10 @@ class StreamFacadeTest(unittest.TestCase):
|
|
239
257
|
|
240
258
|
def test_read_records_incremental(self):
|
241
259
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
242
|
-
records = [Record(data) for data in expected_stream_data]
|
243
|
-
|
260
|
+
records = [Record(data, "stream") for data in expected_stream_data]
|
261
|
+
partition = Mock()
|
262
|
+
partition.read.return_value = records
|
263
|
+
self._abstract_stream.generate_partitions.return_value = [partition]
|
244
264
|
|
245
265
|
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
246
266
|
|
@@ -252,7 +272,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
252
272
|
stream.primary_key = "id"
|
253
273
|
stream.cursor_field = "cursor"
|
254
274
|
|
255
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
275
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
256
276
|
|
257
277
|
assert facade.name == "stream"
|
258
278
|
assert facade.cursor_field == "cursor"
|
@@ -264,8 +284,8 @@ class StreamFacadeTest(unittest.TestCase):
|
|
264
284
|
stream.primary_key = None
|
265
285
|
stream.cursor_field = []
|
266
286
|
|
267
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
268
|
-
facade._abstract_stream._primary_key
|
287
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
288
|
+
assert facade._abstract_stream._primary_key == []
|
269
289
|
|
270
290
|
def test_create_from_stream_with_composite_primary_key(self):
|
271
291
|
stream = Mock()
|
@@ -273,15 +293,15 @@ class StreamFacadeTest(unittest.TestCase):
|
|
273
293
|
stream.primary_key = ["id", "name"]
|
274
294
|
stream.cursor_field = []
|
275
295
|
|
276
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
277
|
-
facade._abstract_stream._primary_key == ["id", "name"]
|
296
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
297
|
+
assert facade._abstract_stream._primary_key == ["id", "name"]
|
278
298
|
|
279
299
|
def test_create_from_stream_with_empty_list_cursor(self):
|
280
300
|
stream = Mock()
|
281
301
|
stream.primary_key = "id"
|
282
302
|
stream.cursor_field = []
|
283
303
|
|
284
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
304
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
285
305
|
|
286
306
|
assert facade.cursor_field == []
|
287
307
|
|
@@ -291,7 +311,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
291
311
|
stream.primary_key = [["field", "id"]]
|
292
312
|
|
293
313
|
with self.assertRaises(ValueError):
|
294
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger,
|
314
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
295
315
|
|
296
316
|
def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
|
297
317
|
stream = Mock()
|
@@ -299,7 +319,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
299
319
|
stream.primary_key = 123
|
300
320
|
|
301
321
|
with self.assertRaises(ValueError):
|
302
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger,
|
322
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
303
323
|
|
304
324
|
def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
|
305
325
|
stream = Mock()
|
@@ -308,7 +328,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
308
328
|
stream.cursor_field = ["field", "cursor"]
|
309
329
|
|
310
330
|
with self.assertRaises(ValueError):
|
311
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger,
|
331
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
312
332
|
|
313
333
|
def test_create_from_stream_with_cursor_field_as_list(self):
|
314
334
|
stream = Mock()
|
@@ -316,7 +336,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
316
336
|
stream.primary_key = "id"
|
317
337
|
stream.cursor_field = ["cursor"]
|
318
338
|
|
319
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
339
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
320
340
|
assert facade.cursor_field == "cursor"
|
321
341
|
|
322
342
|
def test_create_from_stream_none_message_repository(self):
|
@@ -326,12 +346,12 @@ class StreamFacadeTest(unittest.TestCase):
|
|
326
346
|
self._source.message_repository = None
|
327
347
|
|
328
348
|
with self.assertRaises(ValueError):
|
329
|
-
StreamFacade.create_from_stream(self._stream, self._source, self._logger,
|
349
|
+
StreamFacade.create_from_stream(self._stream, self._source, self._logger, {}, self._cursor)
|
330
350
|
|
331
351
|
def test_get_error_display_message_no_display_message(self):
|
332
352
|
self._stream.get_error_display_message.return_value = "display_message"
|
333
353
|
|
334
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger,
|
354
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
335
355
|
|
336
356
|
expected_display_message = None
|
337
357
|
e = Exception()
|
@@ -343,7 +363,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
343
363
|
def test_get_error_display_message_with_display_message(self):
|
344
364
|
self._stream.get_error_display_message.return_value = "display_message"
|
345
365
|
|
346
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger,
|
366
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
347
367
|
|
348
368
|
expected_display_message = "display_message"
|
349
369
|
e = ExceptionWithDisplayMessage("display_message")
|
@@ -364,7 +384,7 @@ def test_get_error_display_message(exception, expected_display_message):
|
|
364
384
|
stream = Mock()
|
365
385
|
legacy_stream = Mock()
|
366
386
|
cursor = Mock(spec=Cursor)
|
367
|
-
facade = StreamFacade(stream, legacy_stream, cursor)
|
387
|
+
facade = StreamFacade(stream, legacy_stream, cursor, Mock().Mock(), Mock())
|
368
388
|
|
369
389
|
display_message = facade.get_error_display_message(exception)
|
370
390
|
|