airbyte-cdk 0.53.9__py3-none-any.whl → 0.55.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +16 -4
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +14 -14
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +2 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
- airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
- airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
- airbyte_cdk/sources/utils/slice_logger.py +5 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +40 -28
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +1 -1
- unit_tests/sources/concurrent_source/__init__.py +3 -0
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +33 -0
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +9 -2
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
- unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
- unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
- unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
- unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
- unit_tests/sources/streams/test_stream_read.py +1 -2
- unit_tests/sources/test_concurrent_source.py +105 -0
- unit_tests/sources/test_source_read.py +461 -0
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,23 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
5
4
|
import logging
|
6
5
|
|
7
6
|
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
7
|
+
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
8
8
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
9
|
-
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
10
|
-
from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
|
11
9
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
|
12
10
|
from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import (
|
13
11
|
AlwaysAvailableAvailabilityStrategy,
|
14
12
|
ConcurrentSourceBuilder,
|
15
13
|
InMemoryPartition,
|
16
14
|
InMemoryPartitionGenerator,
|
17
|
-
NeverLogSliceLogger,
|
18
15
|
)
|
19
16
|
|
20
|
-
_id_only_stream =
|
21
|
-
partition_generator=InMemoryPartitionGenerator(
|
22
|
-
|
17
|
+
_id_only_stream = DefaultStream(
|
18
|
+
partition_generator=InMemoryPartitionGenerator(
|
19
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
|
20
|
+
),
|
23
21
|
name="stream1",
|
24
22
|
json_schema={
|
25
23
|
"type": "object",
|
@@ -30,15 +28,13 @@ _id_only_stream = ThreadBasedConcurrentStream(
|
|
30
28
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
31
29
|
primary_key=[],
|
32
30
|
cursor_field=None,
|
33
|
-
slice_logger=NeverLogSliceLogger(),
|
34
31
|
logger=logging.getLogger("test_logger"),
|
35
|
-
message_repository=None,
|
36
|
-
timeout_seconds=300,
|
37
32
|
)
|
38
33
|
|
39
|
-
_id_only_stream_with_slice_logger =
|
40
|
-
partition_generator=InMemoryPartitionGenerator(
|
41
|
-
|
34
|
+
_id_only_stream_with_slice_logger = DefaultStream(
|
35
|
+
partition_generator=InMemoryPartitionGenerator(
|
36
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
|
37
|
+
),
|
42
38
|
name="stream1",
|
43
39
|
json_schema={
|
44
40
|
"type": "object",
|
@@ -49,15 +45,13 @@ _id_only_stream_with_slice_logger = ThreadBasedConcurrentStream(
|
|
49
45
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
50
46
|
primary_key=[],
|
51
47
|
cursor_field=None,
|
52
|
-
slice_logger=AlwaysLogSliceLogger(),
|
53
48
|
logger=logging.getLogger("test_logger"),
|
54
|
-
message_repository=None,
|
55
|
-
timeout_seconds=300,
|
56
49
|
)
|
57
50
|
|
58
|
-
_id_only_stream_with_primary_key =
|
59
|
-
partition_generator=InMemoryPartitionGenerator(
|
60
|
-
|
51
|
+
_id_only_stream_with_primary_key = DefaultStream(
|
52
|
+
partition_generator=InMemoryPartitionGenerator(
|
53
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
|
54
|
+
),
|
61
55
|
name="stream1",
|
62
56
|
json_schema={
|
63
57
|
"type": "object",
|
@@ -68,20 +62,16 @@ _id_only_stream_with_primary_key = ThreadBasedConcurrentStream(
|
|
68
62
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
69
63
|
primary_key=["id"],
|
70
64
|
cursor_field=None,
|
71
|
-
slice_logger=NeverLogSliceLogger(),
|
72
65
|
logger=logging.getLogger("test_logger"),
|
73
|
-
message_repository=None,
|
74
|
-
timeout_seconds=300,
|
75
66
|
)
|
76
67
|
|
77
|
-
_id_only_stream_multiple_partitions =
|
68
|
+
_id_only_stream_multiple_partitions = DefaultStream(
|
78
69
|
partition_generator=InMemoryPartitionGenerator(
|
79
70
|
[
|
80
|
-
InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
|
81
|
-
InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
|
71
|
+
InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
|
72
|
+
InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
|
82
73
|
]
|
83
74
|
),
|
84
|
-
max_workers=1,
|
85
75
|
name="stream1",
|
86
76
|
json_schema={
|
87
77
|
"type": "object",
|
@@ -92,20 +82,16 @@ _id_only_stream_multiple_partitions = ThreadBasedConcurrentStream(
|
|
92
82
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
93
83
|
primary_key=[],
|
94
84
|
cursor_field=None,
|
95
|
-
slice_logger=NeverLogSliceLogger(),
|
96
85
|
logger=logging.getLogger("test_logger"),
|
97
|
-
message_repository=None,
|
98
|
-
timeout_seconds=300,
|
99
86
|
)
|
100
87
|
|
101
|
-
_id_only_stream_multiple_partitions_concurrency_level_two =
|
88
|
+
_id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
|
102
89
|
partition_generator=InMemoryPartitionGenerator(
|
103
90
|
[
|
104
|
-
InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
|
105
|
-
InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
|
91
|
+
InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
|
92
|
+
InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
|
106
93
|
]
|
107
94
|
),
|
108
|
-
max_workers=2,
|
109
95
|
name="stream1",
|
110
96
|
json_schema={
|
111
97
|
"type": "object",
|
@@ -116,17 +102,13 @@ _id_only_stream_multiple_partitions_concurrency_level_two = ThreadBasedConcurren
|
|
116
102
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
117
103
|
primary_key=[],
|
118
104
|
cursor_field=None,
|
119
|
-
slice_logger=NeverLogSliceLogger(),
|
120
105
|
logger=logging.getLogger("test_logger"),
|
121
|
-
message_repository=None,
|
122
|
-
timeout_seconds=300,
|
123
106
|
)
|
124
107
|
|
125
|
-
_stream_raising_exception =
|
108
|
+
_stream_raising_exception = DefaultStream(
|
126
109
|
partition_generator=InMemoryPartitionGenerator(
|
127
|
-
[InMemoryPartition("partition1", None, [Record({"id": "1"}), ValueError("test exception")])]
|
110
|
+
[InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), ValueError("test exception")])]
|
128
111
|
),
|
129
|
-
max_workers=1,
|
130
112
|
name="stream1",
|
131
113
|
json_schema={
|
132
114
|
"type": "object",
|
@@ -137,10 +119,7 @@ _stream_raising_exception = ThreadBasedConcurrentStream(
|
|
137
119
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
138
120
|
primary_key=[],
|
139
121
|
cursor_field=None,
|
140
|
-
slice_logger=NeverLogSliceLogger(),
|
141
122
|
logger=logging.getLogger("test_logger"),
|
142
|
-
message_repository=None,
|
143
|
-
timeout_seconds=300,
|
144
123
|
)
|
145
124
|
|
146
125
|
test_concurrent_cdk_single_stream = (
|
@@ -165,15 +144,14 @@ test_concurrent_cdk_single_stream = (
|
|
165
144
|
.set_expected_logs(
|
166
145
|
{
|
167
146
|
"read": [
|
168
|
-
{"level": "INFO", "message": "Starting syncing
|
147
|
+
{"level": "INFO", "message": "Starting syncing"},
|
169
148
|
{"level": "INFO", "message": "Marking stream stream1 as STARTED"},
|
170
149
|
{"level": "INFO", "message": "Syncing stream: stream1"},
|
171
150
|
{"level": "INFO", "message": "Marking stream stream1 as RUNNING"},
|
172
151
|
{"level": "INFO", "message": "Read 2 records from stream1 stream"},
|
173
152
|
{"level": "INFO", "message": "Marking stream stream1 as STOPPED"},
|
174
153
|
{"level": "INFO", "message": "Finished syncing stream1"},
|
175
|
-
{"level": "INFO", "message": "
|
176
|
-
{"level": "INFO", "message": "Finished syncing ConcurrentCdkSource"},
|
154
|
+
{"level": "INFO", "message": "Finished syncing"},
|
177
155
|
]
|
178
156
|
}
|
179
157
|
)
|
@@ -202,11 +180,13 @@ test_concurrent_cdk_single_stream_with_primary_key = (
|
|
202
180
|
.set_name("test_concurrent_cdk_single_stream_with_primary_key")
|
203
181
|
.set_config({})
|
204
182
|
.set_source_builder(
|
205
|
-
ConcurrentSourceBuilder()
|
183
|
+
ConcurrentSourceBuilder()
|
184
|
+
.set_streams(
|
206
185
|
[
|
207
186
|
_id_only_stream_with_primary_key,
|
208
187
|
]
|
209
188
|
)
|
189
|
+
.set_message_repository(InMemoryMessageRepository())
|
210
190
|
)
|
211
191
|
.set_expected_records(
|
212
192
|
[
|
@@ -239,14 +219,21 @@ test_concurrent_cdk_multiple_streams = (
|
|
239
219
|
.set_name("test_concurrent_cdk_multiple_streams")
|
240
220
|
.set_config({})
|
241
221
|
.set_source_builder(
|
242
|
-
ConcurrentSourceBuilder()
|
222
|
+
ConcurrentSourceBuilder()
|
223
|
+
.set_streams(
|
243
224
|
[
|
244
225
|
_id_only_stream,
|
245
|
-
|
226
|
+
DefaultStream(
|
246
227
|
partition_generator=InMemoryPartitionGenerator(
|
247
|
-
[
|
228
|
+
[
|
229
|
+
InMemoryPartition(
|
230
|
+
"partition1",
|
231
|
+
"stream2",
|
232
|
+
None,
|
233
|
+
[Record({"id": "10", "key": "v1"}, "stream2"), Record({"id": "20", "key": "v2"}, "stream2")],
|
234
|
+
)
|
235
|
+
]
|
248
236
|
),
|
249
|
-
max_workers=1,
|
250
237
|
name="stream2",
|
251
238
|
json_schema={
|
252
239
|
"type": "object",
|
@@ -258,13 +245,11 @@ test_concurrent_cdk_multiple_streams = (
|
|
258
245
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
259
246
|
primary_key=[],
|
260
247
|
cursor_field=None,
|
261
|
-
slice_logger=NeverLogSliceLogger(),
|
262
248
|
logger=logging.getLogger("test_logger"),
|
263
|
-
message_repository=None,
|
264
|
-
timeout_seconds=300,
|
265
249
|
),
|
266
250
|
]
|
267
251
|
)
|
252
|
+
.set_message_repository(InMemoryMessageRepository())
|
268
253
|
)
|
269
254
|
.set_expected_records(
|
270
255
|
[
|
@@ -347,11 +332,13 @@ test_concurrent_cdk_single_stream_multiple_partitions = (
|
|
347
332
|
.set_name("test_concurrent_cdk_single_stream_multiple_partitions")
|
348
333
|
.set_config({})
|
349
334
|
.set_source_builder(
|
350
|
-
ConcurrentSourceBuilder()
|
335
|
+
ConcurrentSourceBuilder()
|
336
|
+
.set_streams(
|
351
337
|
[
|
352
338
|
_id_only_stream_multiple_partitions,
|
353
339
|
]
|
354
340
|
)
|
341
|
+
.set_message_repository(InMemoryMessageRepository())
|
355
342
|
)
|
356
343
|
.set_expected_records(
|
357
344
|
[
|
@@ -385,11 +372,13 @@ test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two = (
|
|
385
372
|
.set_name("test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_2")
|
386
373
|
.set_config({})
|
387
374
|
.set_source_builder(
|
388
|
-
ConcurrentSourceBuilder()
|
375
|
+
ConcurrentSourceBuilder()
|
376
|
+
.set_streams(
|
389
377
|
[
|
390
378
|
_id_only_stream_multiple_partitions_concurrency_level_two,
|
391
379
|
]
|
392
380
|
)
|
381
|
+
.set_message_repository(InMemoryMessageRepository())
|
393
382
|
)
|
394
383
|
.set_expected_records(
|
395
384
|
[
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
5
4
|
import json
|
6
5
|
import logging
|
7
6
|
from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
|
8
7
|
|
9
8
|
from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
|
10
|
-
from airbyte_cdk.sources import
|
9
|
+
from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
|
10
|
+
from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
|
11
11
|
from airbyte_cdk.sources.message import MessageRepository
|
12
12
|
from airbyte_cdk.sources.streams import Stream
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
|
15
15
|
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
16
|
+
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
16
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
17
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
18
19
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
19
|
-
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
20
20
|
from airbyte_cdk.sources.streams.core import StreamData
|
21
21
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
22
22
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
@@ -37,17 +37,18 @@ class LegacyStream(Stream):
|
|
37
37
|
yield from []
|
38
38
|
|
39
39
|
|
40
|
-
class ConcurrentCdkSource(
|
41
|
-
def __init__(self, streams: List[
|
40
|
+
class ConcurrentCdkSource(ConcurrentSourceAdapter):
|
41
|
+
def __init__(self, streams: List[DefaultStream], message_repository: Optional[MessageRepository], max_workers, timeout_in_seconds):
|
42
|
+
concurrent_source = ConcurrentSource.create(1, 1, streams[0]._logger, NeverLogSliceLogger(), message_repository)
|
43
|
+
super().__init__(concurrent_source)
|
42
44
|
self._streams = streams
|
43
|
-
self._message_repository = message_repository
|
44
45
|
|
45
46
|
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
|
46
47
|
# Check is not verified because it is up to the source to implement this method
|
47
48
|
return True, None
|
48
49
|
|
49
50
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
50
|
-
return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
|
51
|
+
return [StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger) for s in self._streams]
|
51
52
|
|
52
53
|
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
53
54
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -56,7 +57,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
56
57
|
return ConfiguredAirbyteCatalog(
|
57
58
|
streams=[
|
58
59
|
ConfiguredAirbyteStream(
|
59
|
-
stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
|
60
|
+
stream=StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
|
60
61
|
sync_mode=SyncMode.full_refresh,
|
61
62
|
destination_sync_mode=DestinationSyncMode.overwrite,
|
62
63
|
)
|
@@ -78,10 +79,15 @@ class InMemoryPartitionGenerator(PartitionGenerator):
|
|
78
79
|
|
79
80
|
|
80
81
|
class InMemoryPartition(Partition):
|
81
|
-
def
|
82
|
+
def stream_name(self) -> str:
|
83
|
+
return self._stream_name
|
84
|
+
|
85
|
+
def __init__(self, name, stream_name, _slice, records):
|
82
86
|
self._name = name
|
87
|
+
self._stream_name = stream_name
|
83
88
|
self._slice = _slice
|
84
89
|
self._records = records
|
90
|
+
self._is_closed = False
|
85
91
|
|
86
92
|
def read(self) -> Iterable[Record]:
|
87
93
|
for record_or_exception in self._records:
|
@@ -101,19 +107,22 @@ class InMemoryPartition(Partition):
|
|
101
107
|
else:
|
102
108
|
return hash(self._name)
|
103
109
|
|
110
|
+
def close(self) -> None:
|
111
|
+
self._is_closed = True
|
112
|
+
|
113
|
+
def is_closed(self) -> bool:
|
114
|
+
return self._is_closed
|
115
|
+
|
104
116
|
|
105
117
|
class ConcurrentSourceBuilder(SourceBuilder[ConcurrentCdkSource]):
|
106
118
|
def __init__(self):
|
107
|
-
self._streams: List[
|
119
|
+
self._streams: List[DefaultStream] = []
|
108
120
|
self._message_repository = None
|
109
121
|
|
110
122
|
def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> ConcurrentCdkSource:
|
111
|
-
|
112
|
-
if not stream._message_repository:
|
113
|
-
stream._message_repository = self._message_repository
|
114
|
-
return ConcurrentCdkSource(self._streams, self._message_repository)
|
123
|
+
return ConcurrentCdkSource(self._streams, self._message_repository, 1, 1)
|
115
124
|
|
116
|
-
def set_streams(self, streams: List[
|
125
|
+
def set_streams(self, streams: List[DefaultStream]) -> "ConcurrentSourceBuilder":
|
117
126
|
self._streams = streams
|
118
127
|
return self
|
119
128
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
4
|
+
import logging
|
5
5
|
import unittest
|
6
6
|
from unittest.mock import Mock
|
7
7
|
|
@@ -17,15 +17,18 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
|
|
17
17
|
StreamPartitionGenerator,
|
18
18
|
)
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
|
20
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
21
21
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
22
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
23
|
from airbyte_cdk.sources.streams.core import Stream
|
24
|
+
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
24
25
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
25
26
|
|
26
27
|
_ANY_SYNC_MODE = SyncMode.full_refresh
|
27
28
|
_ANY_STATE = {"state_key": "state_value"}
|
28
29
|
_ANY_CURSOR_FIELD = ["a", "cursor", "key"]
|
30
|
+
_STREAM_NAME = "stream"
|
31
|
+
_ANY_CURSOR = Mock(spec=Cursor)
|
29
32
|
|
30
33
|
|
31
34
|
@pytest.mark.parametrize(
|
@@ -77,7 +80,7 @@ def test_stream_partition_generator(sync_mode):
|
|
77
80
|
stream_slices = [{"slice": 1}, {"slice": 2}]
|
78
81
|
stream.stream_slices.return_value = stream_slices
|
79
82
|
|
80
|
-
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
83
|
+
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
|
81
84
|
|
82
85
|
partitions = list(partition_generator.generate())
|
83
86
|
slices = [partition.to_slice() for partition in partitions]
|
@@ -88,16 +91,21 @@ def test_stream_partition_generator(sync_mode):
|
|
88
91
|
@pytest.mark.parametrize(
|
89
92
|
"transformer, expected_records",
|
90
93
|
[
|
91
|
-
pytest.param(
|
94
|
+
pytest.param(
|
95
|
+
TypeTransformer(TransformConfig.NoTransform),
|
96
|
+
[Record({"data": "1"}, _STREAM_NAME), Record({"data": "2"}, _STREAM_NAME)],
|
97
|
+
id="test_no_transform",
|
98
|
+
),
|
92
99
|
pytest.param(
|
93
100
|
TypeTransformer(TransformConfig.DefaultSchemaNormalization),
|
94
|
-
[Record({"data": 1}), Record({"data": 2})],
|
101
|
+
[Record({"data": 1}, _STREAM_NAME), Record({"data": 2}, _STREAM_NAME)],
|
95
102
|
id="test_default_transform",
|
96
103
|
),
|
97
104
|
],
|
98
105
|
)
|
99
106
|
def test_stream_partition(transformer, expected_records):
|
100
107
|
stream = Mock()
|
108
|
+
stream.name = _STREAM_NAME
|
101
109
|
stream.get_json_schema.return_value = {"type": "object", "properties": {"data": {"type": ["integer"]}}}
|
102
110
|
stream.transformer = transformer
|
103
111
|
message_repository = InMemoryMessageRepository()
|
@@ -105,7 +113,7 @@ def test_stream_partition(transformer, expected_records):
|
|
105
113
|
sync_mode = SyncMode.full_refresh
|
106
114
|
cursor_field = None
|
107
115
|
state = None
|
108
|
-
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
|
116
|
+
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state, _ANY_CURSOR)
|
109
117
|
|
110
118
|
a_log_message = AirbyteMessage(
|
111
119
|
type=MessageType.LOG,
|
@@ -139,7 +147,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
139
147
|
message_repository = InMemoryMessageRepository()
|
140
148
|
_slice = None
|
141
149
|
|
142
|
-
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
150
|
+
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
|
143
151
|
|
144
152
|
stream.read_records.side_effect = Exception()
|
145
153
|
|
@@ -159,7 +167,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
159
167
|
def test_stream_partition_hash(_slice, expected_hash):
|
160
168
|
stream = Mock()
|
161
169
|
stream.name = "stream"
|
162
|
-
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
170
|
+
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
|
163
171
|
|
164
172
|
_hash = partition.__hash__()
|
165
173
|
assert _hash == expected_hash
|
@@ -176,10 +184,11 @@ class StreamFacadeTest(unittest.TestCase):
|
|
176
184
|
)
|
177
185
|
self._legacy_stream = Mock(spec=Stream)
|
178
186
|
self._cursor = Mock(spec=Cursor)
|
179
|
-
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
|
180
187
|
self._logger = Mock()
|
188
|
+
self._slice_logger = Mock()
|
189
|
+
self._slice_logger.should_log_slice_message.return_value = False
|
190
|
+
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor, self._slice_logger, self._logger)
|
181
191
|
self._source = Mock()
|
182
|
-
self._max_workers = 10
|
183
192
|
|
184
193
|
self._stream = Mock()
|
185
194
|
self._stream.primary_key = "id"
|
@@ -206,12 +215,16 @@ class StreamFacadeTest(unittest.TestCase):
|
|
206
215
|
|
207
216
|
def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
|
208
217
|
assert (
|
209
|
-
StreamFacade(
|
218
|
+
StreamFacade(
|
219
|
+
self._abstract_stream, self._legacy_stream, _ANY_CURSOR, Mock(spec=SliceLogger), Mock(spec=logging.Logger)
|
220
|
+
).supports_incremental
|
210
221
|
== self._legacy_stream.supports_incremental
|
211
222
|
)
|
212
223
|
|
213
224
|
def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
|
214
|
-
assert StreamFacade(
|
225
|
+
assert StreamFacade(
|
226
|
+
self._abstract_stream, self._legacy_stream, Mock(spec=Cursor), Mock(spec=SliceLogger), Mock(spec=logging.Logger)
|
227
|
+
).supports_incremental
|
215
228
|
|
216
229
|
def test_check_availability_is_delegated_to_wrapped_stream(self):
|
217
230
|
availability = StreamAvailable()
|
@@ -221,8 +234,11 @@ class StreamFacadeTest(unittest.TestCase):
|
|
221
234
|
|
222
235
|
def test_full_refresh(self):
|
223
236
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
224
|
-
records = [Record(data) for data in expected_stream_data]
|
225
|
-
|
237
|
+
records = [Record(data, "stream") for data in expected_stream_data]
|
238
|
+
|
239
|
+
partition = Mock()
|
240
|
+
partition.read.return_value = records
|
241
|
+
self._abstract_stream.generate_partitions.return_value = [partition]
|
226
242
|
|
227
243
|
actual_stream_data = list(self._facade.read_records(SyncMode.full_refresh, None, None, None))
|
228
244
|
|
@@ -230,8 +246,10 @@ class StreamFacadeTest(unittest.TestCase):
|
|
230
246
|
|
231
247
|
def test_read_records_full_refresh(self):
|
232
248
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
233
|
-
records = [Record(data) for data in expected_stream_data]
|
234
|
-
|
249
|
+
records = [Record(data, "stream") for data in expected_stream_data]
|
250
|
+
partition = Mock()
|
251
|
+
partition.read.return_value = records
|
252
|
+
self._abstract_stream.generate_partitions.return_value = [partition]
|
235
253
|
|
236
254
|
actual_stream_data = list(self._facade.read_full_refresh(None, None, None))
|
237
255
|
|
@@ -239,8 +257,10 @@ class StreamFacadeTest(unittest.TestCase):
|
|
239
257
|
|
240
258
|
def test_read_records_incremental(self):
|
241
259
|
expected_stream_data = [{"data": 1}, {"data": 2}]
|
242
|
-
records = [Record(data) for data in expected_stream_data]
|
243
|
-
|
260
|
+
records = [Record(data, "stream") for data in expected_stream_data]
|
261
|
+
partition = Mock()
|
262
|
+
partition.read.return_value = records
|
263
|
+
self._abstract_stream.generate_partitions.return_value = [partition]
|
244
264
|
|
245
265
|
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
246
266
|
|
@@ -252,7 +272,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
252
272
|
stream.primary_key = "id"
|
253
273
|
stream.cursor_field = "cursor"
|
254
274
|
|
255
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
275
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
256
276
|
|
257
277
|
assert facade.name == "stream"
|
258
278
|
assert facade.cursor_field == "cursor"
|
@@ -264,8 +284,8 @@ class StreamFacadeTest(unittest.TestCase):
|
|
264
284
|
stream.primary_key = None
|
265
285
|
stream.cursor_field = []
|
266
286
|
|
267
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
268
|
-
facade._abstract_stream._primary_key
|
287
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
288
|
+
assert facade._abstract_stream._primary_key == []
|
269
289
|
|
270
290
|
def test_create_from_stream_with_composite_primary_key(self):
|
271
291
|
stream = Mock()
|
@@ -273,15 +293,15 @@ class StreamFacadeTest(unittest.TestCase):
|
|
273
293
|
stream.primary_key = ["id", "name"]
|
274
294
|
stream.cursor_field = []
|
275
295
|
|
276
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
277
|
-
facade._abstract_stream._primary_key == ["id", "name"]
|
296
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
297
|
+
assert facade._abstract_stream._primary_key == ["id", "name"]
|
278
298
|
|
279
299
|
def test_create_from_stream_with_empty_list_cursor(self):
|
280
300
|
stream = Mock()
|
281
301
|
stream.primary_key = "id"
|
282
302
|
stream.cursor_field = []
|
283
303
|
|
284
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
304
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
285
305
|
|
286
306
|
assert facade.cursor_field == []
|
287
307
|
|
@@ -291,7 +311,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
291
311
|
stream.primary_key = [["field", "id"]]
|
292
312
|
|
293
313
|
with self.assertRaises(ValueError):
|
294
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger,
|
314
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
295
315
|
|
296
316
|
def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
|
297
317
|
stream = Mock()
|
@@ -299,7 +319,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
299
319
|
stream.primary_key = 123
|
300
320
|
|
301
321
|
with self.assertRaises(ValueError):
|
302
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger,
|
322
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
303
323
|
|
304
324
|
def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
|
305
325
|
stream = Mock()
|
@@ -308,7 +328,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
308
328
|
stream.cursor_field = ["field", "cursor"]
|
309
329
|
|
310
330
|
with self.assertRaises(ValueError):
|
311
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger,
|
331
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
312
332
|
|
313
333
|
def test_create_from_stream_with_cursor_field_as_list(self):
|
314
334
|
stream = Mock()
|
@@ -316,7 +336,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
316
336
|
stream.primary_key = "id"
|
317
337
|
stream.cursor_field = ["cursor"]
|
318
338
|
|
319
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger,
|
339
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
320
340
|
assert facade.cursor_field == "cursor"
|
321
341
|
|
322
342
|
def test_create_from_stream_none_message_repository(self):
|
@@ -326,12 +346,12 @@ class StreamFacadeTest(unittest.TestCase):
|
|
326
346
|
self._source.message_repository = None
|
327
347
|
|
328
348
|
with self.assertRaises(ValueError):
|
329
|
-
StreamFacade.create_from_stream(self._stream, self._source, self._logger,
|
349
|
+
StreamFacade.create_from_stream(self._stream, self._source, self._logger, {}, self._cursor)
|
330
350
|
|
331
351
|
def test_get_error_display_message_no_display_message(self):
|
332
352
|
self._stream.get_error_display_message.return_value = "display_message"
|
333
353
|
|
334
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger,
|
354
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
335
355
|
|
336
356
|
expected_display_message = None
|
337
357
|
e = Exception()
|
@@ -343,7 +363,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
343
363
|
def test_get_error_display_message_with_display_message(self):
|
344
364
|
self._stream.get_error_display_message.return_value = "display_message"
|
345
365
|
|
346
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger,
|
366
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
|
347
367
|
|
348
368
|
expected_display_message = "display_message"
|
349
369
|
e = ExceptionWithDisplayMessage("display_message")
|
@@ -364,7 +384,7 @@ def test_get_error_display_message(exception, expected_display_message):
|
|
364
384
|
stream = Mock()
|
365
385
|
legacy_stream = Mock()
|
366
386
|
cursor = Mock(spec=Cursor)
|
367
|
-
facade = StreamFacade(stream, legacy_stream, cursor)
|
387
|
+
facade = StreamFacade(stream, legacy_stream, cursor, Mock().Mock(), Mock())
|
368
388
|
|
369
389
|
display_message = facade.get_error_display_message(exception)
|
370
390
|
|
@@ -7,9 +7,9 @@ from unittest.mock import Mock
|
|
7
7
|
|
8
8
|
import pytest
|
9
9
|
from airbyte_cdk.models import SyncMode
|
10
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
|
10
11
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamPartition
|
11
12
|
from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
|
12
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.types import PARTITIONS_GENERATED_SENTINEL
|
13
13
|
|
14
14
|
|
15
15
|
@pytest.mark.parametrize(
|
@@ -17,21 +17,22 @@ from airbyte_cdk.sources.streams.concurrent.partitions.types import PARTITIONS_G
|
|
17
17
|
)
|
18
18
|
def test_partition_generator(slices):
|
19
19
|
queue = Queue()
|
20
|
-
partition_generator = PartitionEnqueuer(queue
|
20
|
+
partition_generator = PartitionEnqueuer(queue)
|
21
21
|
|
22
22
|
stream = Mock()
|
23
23
|
message_repository = Mock()
|
24
24
|
sync_mode = SyncMode.full_refresh
|
25
25
|
cursor_field = None
|
26
26
|
state = None
|
27
|
-
|
28
|
-
|
27
|
+
cursor = Mock()
|
28
|
+
partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state, cursor) for s in slices]
|
29
|
+
stream.generate_partitions.return_value = iter(partitions)
|
29
30
|
|
30
31
|
partition_generator.generate_partitions(stream)
|
31
32
|
|
32
33
|
actual_partitions = []
|
33
34
|
while partition := queue.get(False):
|
34
|
-
if partition
|
35
|
+
if isinstance(partition, PartitionGenerationCompletedSentinel):
|
35
36
|
break
|
36
37
|
actual_partitions.append(partition)
|
37
38
|
|