airbyte-cdk 0.52.7__py3-none-any.whl → 0.52.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/destinations/vector_db_based/config.py +1 -0
- airbyte_cdk/sources/abstract_source.py +12 -61
- airbyte_cdk/sources/message/repository.py +0 -6
- airbyte_cdk/sources/source.py +14 -13
- airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
- airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
- airbyte_cdk/sources/streams/core.py +71 -1
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +27 -25
- unit_tests/sources/message/test_repository.py +7 -20
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
- unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
- unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
- unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
- unit_tests/sources/streams/test_stream_read.py +3 -1
- unit_tests/sources/test_abstract_source.py +12 -9
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
|
|
11
11
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
|
12
12
|
from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
|
13
13
|
from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
|
14
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
15
|
+
test_incremental_stream_with_slice_boundaries,
|
16
|
+
test_incremental_stream_without_slice_boundaries,
|
14
17
|
test_stream_facade_multiple_streams,
|
15
18
|
test_stream_facade_raises_exception,
|
16
19
|
test_stream_facade_single_stream,
|
@@ -43,6 +46,9 @@ scenarios = [
|
|
43
46
|
test_stream_facade_single_stream_with_multiple_slices,
|
44
47
|
test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
|
45
48
|
test_stream_facade_raises_exception,
|
49
|
+
test_incremental_stream_with_slice_boundaries,
|
50
|
+
test_incremental_stream_without_slice_boundaries,
|
51
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
46
52
|
]
|
47
53
|
|
48
54
|
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py
CHANGED
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
|
|
12
12
|
from airbyte_cdk.sources.streams import Stream
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
15
16
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
16
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
17
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
18
19
|
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
20
|
+
from airbyte_cdk.sources.streams.core import StreamData
|
19
21
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
20
22
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
21
23
|
from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
|
22
24
|
|
23
25
|
|
26
|
+
class LegacyStream(Stream):
|
27
|
+
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
28
|
+
return None
|
29
|
+
|
30
|
+
def read_records(
|
31
|
+
self,
|
32
|
+
sync_mode: SyncMode,
|
33
|
+
cursor_field: Optional[List[str]] = None,
|
34
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
35
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
36
|
+
) -> Iterable[StreamData]:
|
37
|
+
yield from []
|
38
|
+
|
39
|
+
|
24
40
|
class ConcurrentCdkSource(AbstractSource):
|
25
41
|
def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
|
26
42
|
self._streams = streams
|
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
31
47
|
return True, None
|
32
48
|
|
33
49
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
34
|
-
return [StreamFacade(s) for s in self._streams]
|
50
|
+
return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
|
35
51
|
|
36
52
|
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
37
53
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
40
56
|
return ConfiguredAirbyteCatalog(
|
41
57
|
streams=[
|
42
58
|
ConfiguredAirbyteStream(
|
43
|
-
stream=StreamFacade(s).as_airbyte_stream(),
|
59
|
+
stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
|
44
60
|
sync_mode=SyncMode.full_refresh,
|
45
61
|
destination_sync_mode=DestinationSyncMode.overwrite,
|
46
62
|
)
|
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
|
|
57
73
|
def __init__(self, partitions: List[Partition]):
|
58
74
|
self._partitions = partitions
|
59
75
|
|
60
|
-
def generate(self
|
76
|
+
def generate(self) -> Iterable[Partition]:
|
61
77
|
yield from self._partitions
|
62
78
|
|
63
79
|
|
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
|
|
17
17
|
StreamPartitionGenerator,
|
18
18
|
)
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
|
20
21
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
21
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
|
+
from airbyte_cdk.sources.streams.core import Stream
|
22
24
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
23
25
|
|
26
|
+
_ANY_SYNC_MODE = SyncMode.full_refresh
|
27
|
+
_ANY_STATE = {"state_key": "state_value"}
|
28
|
+
_ANY_CURSOR_FIELD = ["a", "cursor", "key"]
|
29
|
+
|
24
30
|
|
25
31
|
@pytest.mark.parametrize(
|
26
32
|
"stream_availability, expected_available, expected_message",
|
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
|
|
71
77
|
stream_slices = [{"slice": 1}, {"slice": 2}]
|
72
78
|
stream.stream_slices.return_value = stream_slices
|
73
79
|
|
74
|
-
partition_generator = StreamPartitionGenerator(stream, message_repository)
|
80
|
+
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
75
81
|
|
76
|
-
partitions = list(partition_generator.generate(
|
82
|
+
partitions = list(partition_generator.generate())
|
77
83
|
slices = [partition.to_slice() for partition in partitions]
|
78
84
|
assert slices == stream_slices
|
85
|
+
stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
|
79
86
|
|
80
87
|
|
81
88
|
@pytest.mark.parametrize(
|
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
|
|
95
102
|
stream.transformer = transformer
|
96
103
|
message_repository = InMemoryMessageRepository()
|
97
104
|
_slice = None
|
98
|
-
|
105
|
+
sync_mode = SyncMode.full_refresh
|
106
|
+
cursor_field = None
|
107
|
+
state = None
|
108
|
+
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
|
99
109
|
|
100
110
|
a_log_message = AirbyteMessage(
|
101
111
|
type=MessageType.LOG,
|
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
128
138
|
|
129
139
|
message_repository = InMemoryMessageRepository()
|
130
140
|
_slice = None
|
131
|
-
|
141
|
+
|
142
|
+
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
132
143
|
|
133
144
|
stream.read_records.side_effect = Exception()
|
134
145
|
|
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
148
159
|
def test_stream_partition_hash(_slice, expected_hash):
|
149
160
|
stream = Mock()
|
150
161
|
stream.name = "stream"
|
151
|
-
partition = StreamPartition(stream, _slice, Mock())
|
162
|
+
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
152
163
|
|
153
164
|
_hash = partition.__hash__()
|
154
165
|
assert _hash == expected_hash
|
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
163
174
|
json_schema={"type": "object"},
|
164
175
|
supported_sync_modes=[SyncMode.full_refresh],
|
165
176
|
)
|
166
|
-
self.
|
177
|
+
self._legacy_stream = Mock(spec=Stream)
|
178
|
+
self._cursor = Mock(spec=Cursor)
|
179
|
+
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
|
167
180
|
self._logger = Mock()
|
168
181
|
self._source = Mock()
|
169
182
|
self._max_workers = 10
|
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
|
|
191
204
|
assert self._facade.get_json_schema() == json_schema
|
192
205
|
self._abstract_stream.get_json_schema.assert_called_once_with()
|
193
206
|
|
194
|
-
def
|
195
|
-
assert
|
207
|
+
def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
|
208
|
+
assert (
|
209
|
+
StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
|
210
|
+
== self._legacy_stream.supports_incremental
|
211
|
+
)
|
212
|
+
|
213
|
+
def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
|
214
|
+
assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
|
196
215
|
|
197
216
|
def test_check_availability_is_delegated_to_wrapped_stream(self):
|
198
217
|
availability = StreamAvailable()
|
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
|
|
219
238
|
assert actual_stream_data == expected_stream_data
|
220
239
|
|
221
240
|
def test_read_records_incremental(self):
|
222
|
-
|
223
|
-
|
241
|
+
expected_stream_data = [{"data": 1}, {"data": 2}]
|
242
|
+
records = [Record(data) for data in expected_stream_data]
|
243
|
+
self._abstract_stream.read.return_value = records
|
244
|
+
|
245
|
+
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
246
|
+
|
247
|
+
assert actual_stream_data == expected_stream_data
|
224
248
|
|
225
249
|
def test_create_from_stream_stream(self):
|
226
250
|
stream = Mock()
|
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
228
252
|
stream.primary_key = "id"
|
229
253
|
stream.cursor_field = "cursor"
|
230
254
|
|
231
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
255
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
232
256
|
|
233
257
|
assert facade.name == "stream"
|
234
258
|
assert facade.cursor_field == "cursor"
|
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
240
264
|
stream.primary_key = None
|
241
265
|
stream.cursor_field = []
|
242
266
|
|
243
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
267
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
244
268
|
facade._abstract_stream._primary_key is None
|
245
269
|
|
246
270
|
def test_create_from_stream_with_composite_primary_key(self):
|
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
249
273
|
stream.primary_key = ["id", "name"]
|
250
274
|
stream.cursor_field = []
|
251
275
|
|
252
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
276
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
253
277
|
facade._abstract_stream._primary_key == ["id", "name"]
|
254
278
|
|
255
279
|
def test_create_from_stream_with_empty_list_cursor(self):
|
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
257
281
|
stream.primary_key = "id"
|
258
282
|
stream.cursor_field = []
|
259
283
|
|
260
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
284
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
261
285
|
|
262
286
|
assert facade.cursor_field == []
|
263
287
|
|
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
267
291
|
stream.primary_key = [["field", "id"]]
|
268
292
|
|
269
293
|
with self.assertRaises(ValueError):
|
270
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
294
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
271
295
|
|
272
296
|
def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
|
273
297
|
stream = Mock()
|
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
275
299
|
stream.primary_key = 123
|
276
300
|
|
277
301
|
with self.assertRaises(ValueError):
|
278
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
302
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
279
303
|
|
280
304
|
def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
|
281
305
|
stream = Mock()
|
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
284
308
|
stream.cursor_field = ["field", "cursor"]
|
285
309
|
|
286
310
|
with self.assertRaises(ValueError):
|
287
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
311
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
288
312
|
|
289
313
|
def test_create_from_stream_with_cursor_field_as_list(self):
|
290
314
|
stream = Mock()
|
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
292
316
|
stream.primary_key = "id"
|
293
317
|
stream.cursor_field = ["cursor"]
|
294
318
|
|
295
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
319
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
296
320
|
assert facade.cursor_field == "cursor"
|
297
321
|
|
298
322
|
def test_create_from_stream_none_message_repository(self):
|
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
|
|
302
326
|
self._source.message_repository = None
|
303
327
|
|
304
328
|
with self.assertRaises(ValueError):
|
305
|
-
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
329
|
+
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
|
306
330
|
|
307
331
|
def test_get_error_display_message_no_display_message(self):
|
308
332
|
self._stream.get_error_display_message.return_value = "display_message"
|
309
333
|
|
310
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
334
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
311
335
|
|
312
336
|
expected_display_message = None
|
313
337
|
e = Exception()
|
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
319
343
|
def test_get_error_display_message_with_display_message(self):
|
320
344
|
self._stream.get_error_display_message.return_value = "display_message"
|
321
345
|
|
322
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
346
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
323
347
|
|
324
348
|
expected_display_message = "display_message"
|
325
349
|
e = ExceptionWithDisplayMessage("display_message")
|
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
338
362
|
)
|
339
363
|
def test_get_error_display_message(exception, expected_display_message):
|
340
364
|
stream = Mock()
|
341
|
-
|
365
|
+
legacy_stream = Mock()
|
366
|
+
cursor = Mock(spec=Cursor)
|
367
|
+
facade = StreamFacade(stream, legacy_stream, cursor)
|
342
368
|
|
343
369
|
display_message = facade.get_error_display_message(exception)
|
344
370
|
|
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
|
|
21
21
|
|
22
22
|
stream = Mock()
|
23
23
|
message_repository = Mock()
|
24
|
-
partitions = [StreamPartition(stream, s, message_repository) for s in slices]
|
25
|
-
stream.generate.return_value = iter(partitions)
|
26
|
-
|
27
24
|
sync_mode = SyncMode.full_refresh
|
25
|
+
cursor_field = None
|
26
|
+
state = None
|
27
|
+
partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
|
28
|
+
stream.generate.return_value = iter(partitions)
|
28
29
|
|
29
|
-
partition_generator.generate_partitions(stream
|
30
|
+
partition_generator.generate_partitions(stream)
|
30
31
|
|
31
32
|
actual_partitions = []
|
32
33
|
while partition := queue.get(False):
|
@@ -0,0 +1,130 @@
|
|
1
|
+
from typing import Any, Mapping, Optional
|
2
|
+
from unittest import TestCase
|
3
|
+
from unittest.mock import Mock
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
7
|
+
from airbyte_cdk.sources.message import MessageRepository
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Comparable, ConcurrentCursor, CursorField
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
11
|
+
|
12
|
+
_A_STREAM_NAME = "a stream name"
|
13
|
+
_A_STREAM_NAMESPACE = "a stream namespace"
|
14
|
+
_ANY_STATE = None
|
15
|
+
_A_CURSOR_FIELD_KEY = "a_cursor_field_key"
|
16
|
+
_NO_PARTITION_IDENTIFIER = None
|
17
|
+
_NO_SLICE = None
|
18
|
+
_NO_SLICE_BOUNDARIES = None
|
19
|
+
_LOWER_SLICE_BOUNDARY_FIELD = "lower_boundary"
|
20
|
+
_UPPER_SLICE_BOUNDARY_FIELD = "upper_boundary"
|
21
|
+
_SLICE_BOUNDARY_FIELDS = (_LOWER_SLICE_BOUNDARY_FIELD, _UPPER_SLICE_BOUNDARY_FIELD)
|
22
|
+
_A_VERY_HIGH_CURSOR_VALUE = 1000000000
|
23
|
+
|
24
|
+
|
25
|
+
def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
|
26
|
+
partition = Mock(spec=Partition)
|
27
|
+
partition.to_slice.return_value = _slice
|
28
|
+
return partition
|
29
|
+
|
30
|
+
|
31
|
+
def _record(cursor_value: Comparable) -> Record:
|
32
|
+
return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
|
33
|
+
|
34
|
+
|
35
|
+
class ConcurrentCursorTest(TestCase):
|
36
|
+
def setUp(self) -> None:
|
37
|
+
self._message_repository = Mock(spec=MessageRepository)
|
38
|
+
self._state_manager = Mock(spec=ConnectorStateManager)
|
39
|
+
|
40
|
+
def _cursor_with_slice_boundary_fields(self) -> ConcurrentCursor:
|
41
|
+
return ConcurrentCursor(
|
42
|
+
_A_STREAM_NAME,
|
43
|
+
_A_STREAM_NAMESPACE,
|
44
|
+
_ANY_STATE,
|
45
|
+
self._message_repository,
|
46
|
+
self._state_manager,
|
47
|
+
CursorField(_A_CURSOR_FIELD_KEY),
|
48
|
+
_SLICE_BOUNDARY_FIELDS,
|
49
|
+
)
|
50
|
+
|
51
|
+
def _cursor_without_slice_boundary_fields(self) -> ConcurrentCursor:
|
52
|
+
return ConcurrentCursor(
|
53
|
+
_A_STREAM_NAME,
|
54
|
+
_A_STREAM_NAMESPACE,
|
55
|
+
_ANY_STATE,
|
56
|
+
self._message_repository,
|
57
|
+
self._state_manager,
|
58
|
+
CursorField(_A_CURSOR_FIELD_KEY),
|
59
|
+
None,
|
60
|
+
)
|
61
|
+
|
62
|
+
def test_given_boundary_fields_when_close_partition_then_emit_state(self) -> None:
|
63
|
+
self._cursor_with_slice_boundary_fields().close_partition(
|
64
|
+
_partition(
|
65
|
+
{_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30},
|
66
|
+
)
|
67
|
+
)
|
68
|
+
|
69
|
+
self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
|
70
|
+
self._state_manager.update_state_for_stream.assert_called_once_with(
|
71
|
+
_A_STREAM_NAME,
|
72
|
+
_A_STREAM_NAMESPACE,
|
73
|
+
{
|
74
|
+
"slices": [
|
75
|
+
{
|
76
|
+
"start": 12,
|
77
|
+
"end": 30,
|
78
|
+
},
|
79
|
+
]
|
80
|
+
},
|
81
|
+
)
|
82
|
+
|
83
|
+
def test_given_boundary_fields_and_record_observed_when_close_partition_then_ignore_records(self) -> None:
|
84
|
+
cursor = self._cursor_with_slice_boundary_fields()
|
85
|
+
cursor.observe(_record(_A_VERY_HIGH_CURSOR_VALUE))
|
86
|
+
|
87
|
+
cursor.close_partition(_partition({_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30}))
|
88
|
+
|
89
|
+
assert self._state_manager.update_state_for_stream.call_args_list[0].args[2]["slices"][0]["end"] != _A_VERY_HIGH_CURSOR_VALUE
|
90
|
+
|
91
|
+
def test_given_no_boundary_fields_when_close_partition_then_emit_state(self) -> None:
|
92
|
+
cursor = self._cursor_without_slice_boundary_fields()
|
93
|
+
cursor.observe(_record(10))
|
94
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
95
|
+
|
96
|
+
self._state_manager.update_state_for_stream.assert_called_once_with(
|
97
|
+
_A_STREAM_NAME,
|
98
|
+
_A_STREAM_NAMESPACE,
|
99
|
+
{
|
100
|
+
"slices": [
|
101
|
+
{
|
102
|
+
"start": 0,
|
103
|
+
"end": 10,
|
104
|
+
},
|
105
|
+
]
|
106
|
+
},
|
107
|
+
)
|
108
|
+
|
109
|
+
def test_given_no_boundary_fields_when_close_multiple_partitions_then_raise_exception(self) -> None:
|
110
|
+
cursor = self._cursor_without_slice_boundary_fields()
|
111
|
+
cursor.observe(_record(10))
|
112
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
113
|
+
|
114
|
+
with pytest.raises(ValueError):
|
115
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
116
|
+
|
117
|
+
def test_given_no_records_observed_when_close_partition_then_do_not_emit_state(self) -> None:
|
118
|
+
cursor = self._cursor_without_slice_boundary_fields()
|
119
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
120
|
+
assert self._message_repository.emit_message.call_count == 0
|
121
|
+
|
122
|
+
def test_given_slice_boundaries_and_no_slice_when_close_partition_then_raise_error(self) -> None:
|
123
|
+
cursor = self._cursor_with_slice_boundary_fields()
|
124
|
+
with pytest.raises(KeyError):
|
125
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
126
|
+
|
127
|
+
def test_given_slice_boundaries_not_matching_slice_when_close_partition_then_raise_error(self) -> None:
|
128
|
+
cursor = self._cursor_with_slice_boundary_fields()
|
129
|
+
with pytest.raises(KeyError):
|
130
|
+
cursor.close_partition(_partition({"not_matching_key": "value"}))
|
@@ -5,9 +5,9 @@
|
|
5
5
|
import unittest
|
6
6
|
from unittest.mock import Mock, call
|
7
7
|
|
8
|
-
import pytest
|
9
8
|
from airbyte_cdk.models import AirbyteStream, SyncMode
|
10
9
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
11
11
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
12
12
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
@@ -25,6 +25,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
25
25
|
self._slice_logger = Mock()
|
26
26
|
self._logger = Mock()
|
27
27
|
self._message_repository = Mock()
|
28
|
+
self._cursor = Mock(spec=Cursor)
|
28
29
|
self._stream = ThreadBasedConcurrentStream(
|
29
30
|
self._partition_generator,
|
30
31
|
self._max_workers,
|
@@ -39,6 +40,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
39
40
|
1,
|
40
41
|
2,
|
41
42
|
0,
|
43
|
+
cursor=self._cursor,
|
42
44
|
)
|
43
45
|
|
44
46
|
def test_get_json_schema(self):
|
@@ -76,17 +78,20 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
76
78
|
with self.assertRaises(Exception):
|
77
79
|
self._stream._check_for_errors(futures)
|
78
80
|
|
79
|
-
def
|
81
|
+
def test_read_observe_records_and_close_partition(self):
|
80
82
|
partition = Mock(spec=Partition)
|
81
|
-
|
83
|
+
expected_records = [Record({"id": 1}), Record({"id": "2"})]
|
84
|
+
partition.read.return_value = expected_records
|
85
|
+
partition.to_slice.return_value = {"slice": "slice"}
|
86
|
+
self._slice_logger.should_log_slice_message.return_value = False
|
87
|
+
|
82
88
|
self._partition_generator.generate.return_value = [partition]
|
83
|
-
|
84
|
-
|
89
|
+
actual_records = list(self._stream.read())
|
90
|
+
|
91
|
+
assert expected_records == actual_records
|
85
92
|
|
86
|
-
|
87
|
-
self.
|
88
|
-
with pytest.raises(RuntimeError):
|
89
|
-
list(self._stream.read())
|
93
|
+
self._cursor.observe.has_calls([call(record) for record in expected_records])
|
94
|
+
self._cursor.close_partition.assert_called_once_with(partition)
|
90
95
|
|
91
96
|
def test_read_no_slice_message(self):
|
92
97
|
partition = Mock(spec=Partition)
|
@@ -218,7 +223,6 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
218
223
|
assert expected_airbyte_stream == airbyte_stream
|
219
224
|
|
220
225
|
def test_as_airbyte_stream_with_a_cursor(self):
|
221
|
-
|
222
226
|
json_schema = {
|
223
227
|
"type": "object",
|
224
228
|
"properties": {
|
@@ -12,6 +12,7 @@ from airbyte_cdk.models import Type as MessageType
|
|
12
12
|
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
13
13
|
from airbyte_cdk.sources.streams import Stream
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
15
16
|
from airbyte_cdk.sources.streams.core import StreamData
|
16
17
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
17
18
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
|
@@ -19,6 +20,7 @@ from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
|
|
19
20
|
_A_CURSOR_FIELD = ["NESTED", "CURSOR"]
|
20
21
|
_DEFAULT_INTERNAL_CONFIG = InternalConfig()
|
21
22
|
_STREAM_NAME = "STREAM"
|
23
|
+
_NO_STATE = None
|
22
24
|
|
23
25
|
|
24
26
|
class _MockStream(Stream):
|
@@ -57,7 +59,7 @@ def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message
|
|
57
59
|
source = Mock()
|
58
60
|
source._slice_logger = slice_logger
|
59
61
|
source.message_repository = message_repository
|
60
|
-
stream = StreamFacade.create_from_stream(stream, source, logger, 1)
|
62
|
+
stream = StreamFacade.create_from_stream(stream, source, logger, 1, _NO_STATE, NoopCursor())
|
61
63
|
stream.logger.setLevel(logger.level)
|
62
64
|
return stream
|
63
65
|
|
@@ -996,10 +996,11 @@ class TestIncrementalRead:
|
|
996
996
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
997
997
|
# stream 1 slice 2
|
998
998
|
_as_record("s1", stream_output[0]),
|
999
|
-
_as_record("s1", stream_output[1]),
|
1000
999
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1000
|
+
_as_record("s1", stream_output[1]),
|
1001
1001
|
_as_record("s1", stream_output[2]),
|
1002
1002
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1003
|
+
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1003
1004
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1004
1005
|
# stream 2 slice 1
|
1005
1006
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
@@ -1011,17 +1012,18 @@ class TestIncrementalRead:
|
|
1011
1012
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1012
1013
|
# stream 2 slice 2
|
1013
1014
|
_as_record("s2", stream_output[0]),
|
1014
|
-
_as_record("s2", stream_output[1]),
|
1015
1015
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1016
|
+
_as_record("s2", stream_output[1]),
|
1016
1017
|
_as_record("s2", stream_output[2]),
|
1017
1018
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1019
|
+
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1018
1020
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1019
1021
|
]
|
1020
1022
|
)
|
1021
1023
|
|
1022
1024
|
messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
|
1023
1025
|
|
1024
|
-
assert
|
1026
|
+
assert messages == expected
|
1025
1027
|
|
1026
1028
|
@pytest.mark.parametrize(
|
1027
1029
|
"per_stream_enabled",
|
@@ -1108,11 +1110,12 @@ class TestIncrementalRead:
|
|
1108
1110
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1109
1111
|
# stream 1 slice 2
|
1110
1112
|
stream_data_to_airbyte_message("s1", stream_output[0]),
|
1113
|
+
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1111
1114
|
stream_data_to_airbyte_message("s1", stream_output[1]),
|
1112
1115
|
stream_data_to_airbyte_message("s1", stream_output[2]),
|
1113
|
-
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1114
1116
|
stream_data_to_airbyte_message("s1", stream_output[3]),
|
1115
1117
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1118
|
+
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1116
1119
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1117
1120
|
# stream 2 slice 1
|
1118
1121
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
@@ -1125,33 +1128,33 @@ class TestIncrementalRead:
|
|
1125
1128
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1126
1129
|
# stream 2 slice 2
|
1127
1130
|
stream_data_to_airbyte_message("s2", stream_output[0]),
|
1131
|
+
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1128
1132
|
stream_data_to_airbyte_message("s2", stream_output[1]),
|
1129
1133
|
stream_data_to_airbyte_message("s2", stream_output[2]),
|
1130
|
-
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1131
1134
|
stream_data_to_airbyte_message("s2", stream_output[3]),
|
1132
1135
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1136
|
+
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1133
1137
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1134
1138
|
]
|
1135
1139
|
)
|
1136
1140
|
|
1137
1141
|
messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
|
1138
1142
|
|
1139
|
-
assert
|
1143
|
+
assert messages == expected
|
1140
1144
|
|
1141
1145
|
|
1142
1146
|
def test_checkpoint_state_from_stream_instance():
|
1143
1147
|
teams_stream = MockStreamOverridesStateMethod()
|
1144
1148
|
managers_stream = StreamNoStateMethod()
|
1145
|
-
src = MockSource(streams=[teams_stream, managers_stream])
|
1146
1149
|
state_manager = ConnectorStateManager({"teams": teams_stream, "managers": managers_stream}, [])
|
1147
1150
|
|
1148
1151
|
# The stream_state passed to checkpoint_state() should be ignored since stream implements state function
|
1149
1152
|
teams_stream.state = {"updated_at": "2022-09-11"}
|
1150
|
-
actual_message =
|
1153
|
+
actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager, True)
|
1151
1154
|
assert actual_message == _as_state({"teams": {"updated_at": "2022-09-11"}}, "teams", {"updated_at": "2022-09-11"})
|
1152
1155
|
|
1153
1156
|
# The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
|
1154
|
-
actual_message =
|
1157
|
+
actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager, True)
|
1155
1158
|
assert actual_message == _as_state(
|
1156
1159
|
{"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
|
1157
1160
|
)
|
File without changes
|
File without changes
|
File without changes
|