airbyte-cdk 0.52.7__py3-none-any.whl → 0.52.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/destinations/vector_db_based/config.py +1 -0
- airbyte_cdk/sources/abstract_source.py +12 -61
- airbyte_cdk/sources/message/repository.py +0 -6
- airbyte_cdk/sources/source.py +14 -13
- airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
- airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
- airbyte_cdk/sources/streams/core.py +71 -1
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +27 -25
- unit_tests/sources/message/test_repository.py +7 -20
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
- unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
- unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
- unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
- unit_tests/sources/streams/test_stream_read.py +3 -1
- unit_tests/sources/test_abstract_source.py +12 -9
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
|
|
11
11
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
|
12
12
|
from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
|
13
13
|
from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
|
14
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
15
|
+
test_incremental_stream_with_slice_boundaries,
|
16
|
+
test_incremental_stream_without_slice_boundaries,
|
14
17
|
test_stream_facade_multiple_streams,
|
15
18
|
test_stream_facade_raises_exception,
|
16
19
|
test_stream_facade_single_stream,
|
@@ -43,6 +46,9 @@ scenarios = [
|
|
43
46
|
test_stream_facade_single_stream_with_multiple_slices,
|
44
47
|
test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
|
45
48
|
test_stream_facade_raises_exception,
|
49
|
+
test_incremental_stream_with_slice_boundaries,
|
50
|
+
test_incremental_stream_without_slice_boundaries,
|
51
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
46
52
|
]
|
47
53
|
|
48
54
|
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py
CHANGED
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
|
|
12
12
|
from airbyte_cdk.sources.streams import Stream
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
15
16
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
16
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
17
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
18
19
|
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
20
|
+
from airbyte_cdk.sources.streams.core import StreamData
|
19
21
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
20
22
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
21
23
|
from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
|
22
24
|
|
23
25
|
|
26
|
+
class LegacyStream(Stream):
|
27
|
+
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
28
|
+
return None
|
29
|
+
|
30
|
+
def read_records(
|
31
|
+
self,
|
32
|
+
sync_mode: SyncMode,
|
33
|
+
cursor_field: Optional[List[str]] = None,
|
34
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
35
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
36
|
+
) -> Iterable[StreamData]:
|
37
|
+
yield from []
|
38
|
+
|
39
|
+
|
24
40
|
class ConcurrentCdkSource(AbstractSource):
|
25
41
|
def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
|
26
42
|
self._streams = streams
|
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
31
47
|
return True, None
|
32
48
|
|
33
49
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
34
|
-
return [StreamFacade(s) for s in self._streams]
|
50
|
+
return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
|
35
51
|
|
36
52
|
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
37
53
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
40
56
|
return ConfiguredAirbyteCatalog(
|
41
57
|
streams=[
|
42
58
|
ConfiguredAirbyteStream(
|
43
|
-
stream=StreamFacade(s).as_airbyte_stream(),
|
59
|
+
stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
|
44
60
|
sync_mode=SyncMode.full_refresh,
|
45
61
|
destination_sync_mode=DestinationSyncMode.overwrite,
|
46
62
|
)
|
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
|
|
57
73
|
def __init__(self, partitions: List[Partition]):
|
58
74
|
self._partitions = partitions
|
59
75
|
|
60
|
-
def generate(self
|
76
|
+
def generate(self) -> Iterable[Partition]:
|
61
77
|
yield from self._partitions
|
62
78
|
|
63
79
|
|
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
|
|
17
17
|
StreamPartitionGenerator,
|
18
18
|
)
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
|
20
21
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
21
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
|
+
from airbyte_cdk.sources.streams.core import Stream
|
22
24
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
23
25
|
|
26
|
+
_ANY_SYNC_MODE = SyncMode.full_refresh
|
27
|
+
_ANY_STATE = {"state_key": "state_value"}
|
28
|
+
_ANY_CURSOR_FIELD = ["a", "cursor", "key"]
|
29
|
+
|
24
30
|
|
25
31
|
@pytest.mark.parametrize(
|
26
32
|
"stream_availability, expected_available, expected_message",
|
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
|
|
71
77
|
stream_slices = [{"slice": 1}, {"slice": 2}]
|
72
78
|
stream.stream_slices.return_value = stream_slices
|
73
79
|
|
74
|
-
partition_generator = StreamPartitionGenerator(stream, message_repository)
|
80
|
+
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
75
81
|
|
76
|
-
partitions = list(partition_generator.generate(
|
82
|
+
partitions = list(partition_generator.generate())
|
77
83
|
slices = [partition.to_slice() for partition in partitions]
|
78
84
|
assert slices == stream_slices
|
85
|
+
stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
|
79
86
|
|
80
87
|
|
81
88
|
@pytest.mark.parametrize(
|
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
|
|
95
102
|
stream.transformer = transformer
|
96
103
|
message_repository = InMemoryMessageRepository()
|
97
104
|
_slice = None
|
98
|
-
|
105
|
+
sync_mode = SyncMode.full_refresh
|
106
|
+
cursor_field = None
|
107
|
+
state = None
|
108
|
+
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
|
99
109
|
|
100
110
|
a_log_message = AirbyteMessage(
|
101
111
|
type=MessageType.LOG,
|
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
128
138
|
|
129
139
|
message_repository = InMemoryMessageRepository()
|
130
140
|
_slice = None
|
131
|
-
|
141
|
+
|
142
|
+
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
132
143
|
|
133
144
|
stream.read_records.side_effect = Exception()
|
134
145
|
|
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
148
159
|
def test_stream_partition_hash(_slice, expected_hash):
|
149
160
|
stream = Mock()
|
150
161
|
stream.name = "stream"
|
151
|
-
partition = StreamPartition(stream, _slice, Mock())
|
162
|
+
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
152
163
|
|
153
164
|
_hash = partition.__hash__()
|
154
165
|
assert _hash == expected_hash
|
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
163
174
|
json_schema={"type": "object"},
|
164
175
|
supported_sync_modes=[SyncMode.full_refresh],
|
165
176
|
)
|
166
|
-
self.
|
177
|
+
self._legacy_stream = Mock(spec=Stream)
|
178
|
+
self._cursor = Mock(spec=Cursor)
|
179
|
+
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
|
167
180
|
self._logger = Mock()
|
168
181
|
self._source = Mock()
|
169
182
|
self._max_workers = 10
|
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
|
|
191
204
|
assert self._facade.get_json_schema() == json_schema
|
192
205
|
self._abstract_stream.get_json_schema.assert_called_once_with()
|
193
206
|
|
194
|
-
def
|
195
|
-
assert
|
207
|
+
def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
|
208
|
+
assert (
|
209
|
+
StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
|
210
|
+
== self._legacy_stream.supports_incremental
|
211
|
+
)
|
212
|
+
|
213
|
+
def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
|
214
|
+
assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
|
196
215
|
|
197
216
|
def test_check_availability_is_delegated_to_wrapped_stream(self):
|
198
217
|
availability = StreamAvailable()
|
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
|
|
219
238
|
assert actual_stream_data == expected_stream_data
|
220
239
|
|
221
240
|
def test_read_records_incremental(self):
|
222
|
-
|
223
|
-
|
241
|
+
expected_stream_data = [{"data": 1}, {"data": 2}]
|
242
|
+
records = [Record(data) for data in expected_stream_data]
|
243
|
+
self._abstract_stream.read.return_value = records
|
244
|
+
|
245
|
+
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
246
|
+
|
247
|
+
assert actual_stream_data == expected_stream_data
|
224
248
|
|
225
249
|
def test_create_from_stream_stream(self):
|
226
250
|
stream = Mock()
|
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
228
252
|
stream.primary_key = "id"
|
229
253
|
stream.cursor_field = "cursor"
|
230
254
|
|
231
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
255
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
232
256
|
|
233
257
|
assert facade.name == "stream"
|
234
258
|
assert facade.cursor_field == "cursor"
|
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
240
264
|
stream.primary_key = None
|
241
265
|
stream.cursor_field = []
|
242
266
|
|
243
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
267
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
244
268
|
facade._abstract_stream._primary_key is None
|
245
269
|
|
246
270
|
def test_create_from_stream_with_composite_primary_key(self):
|
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
249
273
|
stream.primary_key = ["id", "name"]
|
250
274
|
stream.cursor_field = []
|
251
275
|
|
252
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
276
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
253
277
|
facade._abstract_stream._primary_key == ["id", "name"]
|
254
278
|
|
255
279
|
def test_create_from_stream_with_empty_list_cursor(self):
|
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
257
281
|
stream.primary_key = "id"
|
258
282
|
stream.cursor_field = []
|
259
283
|
|
260
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
284
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
261
285
|
|
262
286
|
assert facade.cursor_field == []
|
263
287
|
|
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
267
291
|
stream.primary_key = [["field", "id"]]
|
268
292
|
|
269
293
|
with self.assertRaises(ValueError):
|
270
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
294
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
271
295
|
|
272
296
|
def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
|
273
297
|
stream = Mock()
|
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
275
299
|
stream.primary_key = 123
|
276
300
|
|
277
301
|
with self.assertRaises(ValueError):
|
278
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
302
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
279
303
|
|
280
304
|
def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
|
281
305
|
stream = Mock()
|
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
284
308
|
stream.cursor_field = ["field", "cursor"]
|
285
309
|
|
286
310
|
with self.assertRaises(ValueError):
|
287
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
311
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
288
312
|
|
289
313
|
def test_create_from_stream_with_cursor_field_as_list(self):
|
290
314
|
stream = Mock()
|
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
292
316
|
stream.primary_key = "id"
|
293
317
|
stream.cursor_field = ["cursor"]
|
294
318
|
|
295
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
319
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
296
320
|
assert facade.cursor_field == "cursor"
|
297
321
|
|
298
322
|
def test_create_from_stream_none_message_repository(self):
|
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
|
|
302
326
|
self._source.message_repository = None
|
303
327
|
|
304
328
|
with self.assertRaises(ValueError):
|
305
|
-
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
329
|
+
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
|
306
330
|
|
307
331
|
def test_get_error_display_message_no_display_message(self):
|
308
332
|
self._stream.get_error_display_message.return_value = "display_message"
|
309
333
|
|
310
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
334
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
311
335
|
|
312
336
|
expected_display_message = None
|
313
337
|
e = Exception()
|
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
319
343
|
def test_get_error_display_message_with_display_message(self):
|
320
344
|
self._stream.get_error_display_message.return_value = "display_message"
|
321
345
|
|
322
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
346
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
323
347
|
|
324
348
|
expected_display_message = "display_message"
|
325
349
|
e = ExceptionWithDisplayMessage("display_message")
|
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
338
362
|
)
|
339
363
|
def test_get_error_display_message(exception, expected_display_message):
|
340
364
|
stream = Mock()
|
341
|
-
|
365
|
+
legacy_stream = Mock()
|
366
|
+
cursor = Mock(spec=Cursor)
|
367
|
+
facade = StreamFacade(stream, legacy_stream, cursor)
|
342
368
|
|
343
369
|
display_message = facade.get_error_display_message(exception)
|
344
370
|
|
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
|
|
21
21
|
|
22
22
|
stream = Mock()
|
23
23
|
message_repository = Mock()
|
24
|
-
partitions = [StreamPartition(stream, s, message_repository) for s in slices]
|
25
|
-
stream.generate.return_value = iter(partitions)
|
26
|
-
|
27
24
|
sync_mode = SyncMode.full_refresh
|
25
|
+
cursor_field = None
|
26
|
+
state = None
|
27
|
+
partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
|
28
|
+
stream.generate.return_value = iter(partitions)
|
28
29
|
|
29
|
-
partition_generator.generate_partitions(stream
|
30
|
+
partition_generator.generate_partitions(stream)
|
30
31
|
|
31
32
|
actual_partitions = []
|
32
33
|
while partition := queue.get(False):
|
@@ -0,0 +1,130 @@
|
|
1
|
+
from typing import Any, Mapping, Optional
|
2
|
+
from unittest import TestCase
|
3
|
+
from unittest.mock import Mock
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
7
|
+
from airbyte_cdk.sources.message import MessageRepository
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Comparable, ConcurrentCursor, CursorField
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
11
|
+
|
12
|
+
_A_STREAM_NAME = "a stream name"
|
13
|
+
_A_STREAM_NAMESPACE = "a stream namespace"
|
14
|
+
_ANY_STATE = None
|
15
|
+
_A_CURSOR_FIELD_KEY = "a_cursor_field_key"
|
16
|
+
_NO_PARTITION_IDENTIFIER = None
|
17
|
+
_NO_SLICE = None
|
18
|
+
_NO_SLICE_BOUNDARIES = None
|
19
|
+
_LOWER_SLICE_BOUNDARY_FIELD = "lower_boundary"
|
20
|
+
_UPPER_SLICE_BOUNDARY_FIELD = "upper_boundary"
|
21
|
+
_SLICE_BOUNDARY_FIELDS = (_LOWER_SLICE_BOUNDARY_FIELD, _UPPER_SLICE_BOUNDARY_FIELD)
|
22
|
+
_A_VERY_HIGH_CURSOR_VALUE = 1000000000
|
23
|
+
|
24
|
+
|
25
|
+
def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
|
26
|
+
partition = Mock(spec=Partition)
|
27
|
+
partition.to_slice.return_value = _slice
|
28
|
+
return partition
|
29
|
+
|
30
|
+
|
31
|
+
def _record(cursor_value: Comparable) -> Record:
|
32
|
+
return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
|
33
|
+
|
34
|
+
|
35
|
+
class ConcurrentCursorTest(TestCase):
|
36
|
+
def setUp(self) -> None:
|
37
|
+
self._message_repository = Mock(spec=MessageRepository)
|
38
|
+
self._state_manager = Mock(spec=ConnectorStateManager)
|
39
|
+
|
40
|
+
def _cursor_with_slice_boundary_fields(self) -> ConcurrentCursor:
|
41
|
+
return ConcurrentCursor(
|
42
|
+
_A_STREAM_NAME,
|
43
|
+
_A_STREAM_NAMESPACE,
|
44
|
+
_ANY_STATE,
|
45
|
+
self._message_repository,
|
46
|
+
self._state_manager,
|
47
|
+
CursorField(_A_CURSOR_FIELD_KEY),
|
48
|
+
_SLICE_BOUNDARY_FIELDS,
|
49
|
+
)
|
50
|
+
|
51
|
+
def _cursor_without_slice_boundary_fields(self) -> ConcurrentCursor:
|
52
|
+
return ConcurrentCursor(
|
53
|
+
_A_STREAM_NAME,
|
54
|
+
_A_STREAM_NAMESPACE,
|
55
|
+
_ANY_STATE,
|
56
|
+
self._message_repository,
|
57
|
+
self._state_manager,
|
58
|
+
CursorField(_A_CURSOR_FIELD_KEY),
|
59
|
+
None,
|
60
|
+
)
|
61
|
+
|
62
|
+
def test_given_boundary_fields_when_close_partition_then_emit_state(self) -> None:
|
63
|
+
self._cursor_with_slice_boundary_fields().close_partition(
|
64
|
+
_partition(
|
65
|
+
{_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30},
|
66
|
+
)
|
67
|
+
)
|
68
|
+
|
69
|
+
self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
|
70
|
+
self._state_manager.update_state_for_stream.assert_called_once_with(
|
71
|
+
_A_STREAM_NAME,
|
72
|
+
_A_STREAM_NAMESPACE,
|
73
|
+
{
|
74
|
+
"slices": [
|
75
|
+
{
|
76
|
+
"start": 12,
|
77
|
+
"end": 30,
|
78
|
+
},
|
79
|
+
]
|
80
|
+
},
|
81
|
+
)
|
82
|
+
|
83
|
+
def test_given_boundary_fields_and_record_observed_when_close_partition_then_ignore_records(self) -> None:
|
84
|
+
cursor = self._cursor_with_slice_boundary_fields()
|
85
|
+
cursor.observe(_record(_A_VERY_HIGH_CURSOR_VALUE))
|
86
|
+
|
87
|
+
cursor.close_partition(_partition({_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30}))
|
88
|
+
|
89
|
+
assert self._state_manager.update_state_for_stream.call_args_list[0].args[2]["slices"][0]["end"] != _A_VERY_HIGH_CURSOR_VALUE
|
90
|
+
|
91
|
+
def test_given_no_boundary_fields_when_close_partition_then_emit_state(self) -> None:
|
92
|
+
cursor = self._cursor_without_slice_boundary_fields()
|
93
|
+
cursor.observe(_record(10))
|
94
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
95
|
+
|
96
|
+
self._state_manager.update_state_for_stream.assert_called_once_with(
|
97
|
+
_A_STREAM_NAME,
|
98
|
+
_A_STREAM_NAMESPACE,
|
99
|
+
{
|
100
|
+
"slices": [
|
101
|
+
{
|
102
|
+
"start": 0,
|
103
|
+
"end": 10,
|
104
|
+
},
|
105
|
+
]
|
106
|
+
},
|
107
|
+
)
|
108
|
+
|
109
|
+
def test_given_no_boundary_fields_when_close_multiple_partitions_then_raise_exception(self) -> None:
|
110
|
+
cursor = self._cursor_without_slice_boundary_fields()
|
111
|
+
cursor.observe(_record(10))
|
112
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
113
|
+
|
114
|
+
with pytest.raises(ValueError):
|
115
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
116
|
+
|
117
|
+
def test_given_no_records_observed_when_close_partition_then_do_not_emit_state(self) -> None:
|
118
|
+
cursor = self._cursor_without_slice_boundary_fields()
|
119
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
120
|
+
assert self._message_repository.emit_message.call_count == 0
|
121
|
+
|
122
|
+
def test_given_slice_boundaries_and_no_slice_when_close_partition_then_raise_error(self) -> None:
|
123
|
+
cursor = self._cursor_with_slice_boundary_fields()
|
124
|
+
with pytest.raises(KeyError):
|
125
|
+
cursor.close_partition(_partition(_NO_SLICE))
|
126
|
+
|
127
|
+
def test_given_slice_boundaries_not_matching_slice_when_close_partition_then_raise_error(self) -> None:
|
128
|
+
cursor = self._cursor_with_slice_boundary_fields()
|
129
|
+
with pytest.raises(KeyError):
|
130
|
+
cursor.close_partition(_partition({"not_matching_key": "value"}))
|
@@ -5,9 +5,9 @@
|
|
5
5
|
import unittest
|
6
6
|
from unittest.mock import Mock, call
|
7
7
|
|
8
|
-
import pytest
|
9
8
|
from airbyte_cdk.models import AirbyteStream, SyncMode
|
10
9
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
11
11
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
12
12
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
@@ -25,6 +25,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
25
25
|
self._slice_logger = Mock()
|
26
26
|
self._logger = Mock()
|
27
27
|
self._message_repository = Mock()
|
28
|
+
self._cursor = Mock(spec=Cursor)
|
28
29
|
self._stream = ThreadBasedConcurrentStream(
|
29
30
|
self._partition_generator,
|
30
31
|
self._max_workers,
|
@@ -39,6 +40,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
39
40
|
1,
|
40
41
|
2,
|
41
42
|
0,
|
43
|
+
cursor=self._cursor,
|
42
44
|
)
|
43
45
|
|
44
46
|
def test_get_json_schema(self):
|
@@ -76,17 +78,20 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
76
78
|
with self.assertRaises(Exception):
|
77
79
|
self._stream._check_for_errors(futures)
|
78
80
|
|
79
|
-
def
|
81
|
+
def test_read_observe_records_and_close_partition(self):
|
80
82
|
partition = Mock(spec=Partition)
|
81
|
-
|
83
|
+
expected_records = [Record({"id": 1}), Record({"id": "2"})]
|
84
|
+
partition.read.return_value = expected_records
|
85
|
+
partition.to_slice.return_value = {"slice": "slice"}
|
86
|
+
self._slice_logger.should_log_slice_message.return_value = False
|
87
|
+
|
82
88
|
self._partition_generator.generate.return_value = [partition]
|
83
|
-
|
84
|
-
|
89
|
+
actual_records = list(self._stream.read())
|
90
|
+
|
91
|
+
assert expected_records == actual_records
|
85
92
|
|
86
|
-
|
87
|
-
self.
|
88
|
-
with pytest.raises(RuntimeError):
|
89
|
-
list(self._stream.read())
|
93
|
+
self._cursor.observe.has_calls([call(record) for record in expected_records])
|
94
|
+
self._cursor.close_partition.assert_called_once_with(partition)
|
90
95
|
|
91
96
|
def test_read_no_slice_message(self):
|
92
97
|
partition = Mock(spec=Partition)
|
@@ -218,7 +223,6 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
|
|
218
223
|
assert expected_airbyte_stream == airbyte_stream
|
219
224
|
|
220
225
|
def test_as_airbyte_stream_with_a_cursor(self):
|
221
|
-
|
222
226
|
json_schema = {
|
223
227
|
"type": "object",
|
224
228
|
"properties": {
|
@@ -12,6 +12,7 @@ from airbyte_cdk.models import Type as MessageType
|
|
12
12
|
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
13
13
|
from airbyte_cdk.sources.streams import Stream
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
15
16
|
from airbyte_cdk.sources.streams.core import StreamData
|
16
17
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
17
18
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
|
@@ -19,6 +20,7 @@ from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
|
|
19
20
|
_A_CURSOR_FIELD = ["NESTED", "CURSOR"]
|
20
21
|
_DEFAULT_INTERNAL_CONFIG = InternalConfig()
|
21
22
|
_STREAM_NAME = "STREAM"
|
23
|
+
_NO_STATE = None
|
22
24
|
|
23
25
|
|
24
26
|
class _MockStream(Stream):
|
@@ -57,7 +59,7 @@ def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message
|
|
57
59
|
source = Mock()
|
58
60
|
source._slice_logger = slice_logger
|
59
61
|
source.message_repository = message_repository
|
60
|
-
stream = StreamFacade.create_from_stream(stream, source, logger, 1)
|
62
|
+
stream = StreamFacade.create_from_stream(stream, source, logger, 1, _NO_STATE, NoopCursor())
|
61
63
|
stream.logger.setLevel(logger.level)
|
62
64
|
return stream
|
63
65
|
|
@@ -996,10 +996,11 @@ class TestIncrementalRead:
|
|
996
996
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
997
997
|
# stream 1 slice 2
|
998
998
|
_as_record("s1", stream_output[0]),
|
999
|
-
_as_record("s1", stream_output[1]),
|
1000
999
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1000
|
+
_as_record("s1", stream_output[1]),
|
1001
1001
|
_as_record("s1", stream_output[2]),
|
1002
1002
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1003
|
+
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1003
1004
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1004
1005
|
# stream 2 slice 1
|
1005
1006
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
@@ -1011,17 +1012,18 @@ class TestIncrementalRead:
|
|
1011
1012
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1012
1013
|
# stream 2 slice 2
|
1013
1014
|
_as_record("s2", stream_output[0]),
|
1014
|
-
_as_record("s2", stream_output[1]),
|
1015
1015
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1016
|
+
_as_record("s2", stream_output[1]),
|
1016
1017
|
_as_record("s2", stream_output[2]),
|
1017
1018
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1019
|
+
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1018
1020
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1019
1021
|
]
|
1020
1022
|
)
|
1021
1023
|
|
1022
1024
|
messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
|
1023
1025
|
|
1024
|
-
assert
|
1026
|
+
assert messages == expected
|
1025
1027
|
|
1026
1028
|
@pytest.mark.parametrize(
|
1027
1029
|
"per_stream_enabled",
|
@@ -1108,11 +1110,12 @@ class TestIncrementalRead:
|
|
1108
1110
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1109
1111
|
# stream 1 slice 2
|
1110
1112
|
stream_data_to_airbyte_message("s1", stream_output[0]),
|
1113
|
+
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1111
1114
|
stream_data_to_airbyte_message("s1", stream_output[1]),
|
1112
1115
|
stream_data_to_airbyte_message("s1", stream_output[2]),
|
1113
|
-
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1114
1116
|
stream_data_to_airbyte_message("s1", stream_output[3]),
|
1115
1117
|
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1118
|
+
_as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
|
1116
1119
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1117
1120
|
# stream 2 slice 1
|
1118
1121
|
_as_stream_status("s2", AirbyteStreamStatus.STARTED),
|
@@ -1125,33 +1128,33 @@ class TestIncrementalRead:
|
|
1125
1128
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1126
1129
|
# stream 2 slice 2
|
1127
1130
|
stream_data_to_airbyte_message("s2", stream_output[0]),
|
1131
|
+
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1128
1132
|
stream_data_to_airbyte_message("s2", stream_output[1]),
|
1129
1133
|
stream_data_to_airbyte_message("s2", stream_output[2]),
|
1130
|
-
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1131
1134
|
stream_data_to_airbyte_message("s2", stream_output[3]),
|
1132
1135
|
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1136
|
+
_as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
|
1133
1137
|
_as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
|
1134
1138
|
]
|
1135
1139
|
)
|
1136
1140
|
|
1137
1141
|
messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
|
1138
1142
|
|
1139
|
-
assert
|
1143
|
+
assert messages == expected
|
1140
1144
|
|
1141
1145
|
|
1142
1146
|
def test_checkpoint_state_from_stream_instance():
|
1143
1147
|
teams_stream = MockStreamOverridesStateMethod()
|
1144
1148
|
managers_stream = StreamNoStateMethod()
|
1145
|
-
src = MockSource(streams=[teams_stream, managers_stream])
|
1146
1149
|
state_manager = ConnectorStateManager({"teams": teams_stream, "managers": managers_stream}, [])
|
1147
1150
|
|
1148
1151
|
# The stream_state passed to checkpoint_state() should be ignored since stream implements state function
|
1149
1152
|
teams_stream.state = {"updated_at": "2022-09-11"}
|
1150
|
-
actual_message =
|
1153
|
+
actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager, True)
|
1151
1154
|
assert actual_message == _as_state({"teams": {"updated_at": "2022-09-11"}}, "teams", {"updated_at": "2022-09-11"})
|
1152
1155
|
|
1153
1156
|
# The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
|
1154
|
-
actual_message =
|
1157
|
+
actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager, True)
|
1155
1158
|
assert actual_message == _as_state(
|
1156
1159
|
{"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
|
1157
1160
|
)
|
File without changes
|
File without changes
|
File without changes
|