airbyte-cdk 0.52.7__py3-none-any.whl → 0.52.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. airbyte_cdk/destinations/vector_db_based/config.py +1 -0
  2. airbyte_cdk/sources/abstract_source.py +12 -61
  3. airbyte_cdk/sources/message/repository.py +0 -6
  4. airbyte_cdk/sources/source.py +14 -13
  5. airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
  6. airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
  7. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
  8. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
  9. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
  10. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
  11. airbyte_cdk/sources/streams/core.py +71 -1
  12. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +27 -25
  14. unit_tests/sources/message/test_repository.py +7 -20
  15. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
  16. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
  17. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
  18. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
  19. unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
  20. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
  21. unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
  22. unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
  23. unit_tests/sources/streams/test_stream_read.py +3 -1
  24. unit_tests/sources/test_abstract_source.py +12 -9
  25. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
  26. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +0 -0
  27. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
11
11
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
12
12
  from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
13
13
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
14
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
15
+ test_incremental_stream_with_slice_boundaries,
16
+ test_incremental_stream_without_slice_boundaries,
14
17
  test_stream_facade_multiple_streams,
15
18
  test_stream_facade_raises_exception,
16
19
  test_stream_facade_single_stream,
@@ -43,6 +46,9 @@ scenarios = [
43
46
  test_stream_facade_single_stream_with_multiple_slices,
44
47
  test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
45
48
  test_stream_facade_raises_exception,
49
+ test_incremental_stream_with_slice_boundaries,
50
+ test_incremental_stream_without_slice_boundaries,
51
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
46
52
  ]
47
53
 
48
54
 
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
16
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
16
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
17
18
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
18
19
  from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
20
+ from airbyte_cdk.sources.streams.core import StreamData
19
21
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
20
22
  from airbyte_protocol.models import ConfiguredAirbyteStream
21
23
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
22
24
 
23
25
 
26
+ class LegacyStream(Stream):
27
+ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
28
+ return None
29
+
30
+ def read_records(
31
+ self,
32
+ sync_mode: SyncMode,
33
+ cursor_field: Optional[List[str]] = None,
34
+ stream_slice: Optional[Mapping[str, Any]] = None,
35
+ stream_state: Optional[Mapping[str, Any]] = None,
36
+ ) -> Iterable[StreamData]:
37
+ yield from []
38
+
39
+
24
40
  class ConcurrentCdkSource(AbstractSource):
25
41
  def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
26
42
  self._streams = streams
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
31
47
  return True, None
32
48
 
33
49
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
34
- return [StreamFacade(s) for s in self._streams]
50
+ return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
35
51
 
36
52
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
37
53
  return ConnectorSpecification(connectionSpecification={})
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
40
56
  return ConfiguredAirbyteCatalog(
41
57
  streams=[
42
58
  ConfiguredAirbyteStream(
43
- stream=StreamFacade(s).as_airbyte_stream(),
59
+ stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
44
60
  sync_mode=SyncMode.full_refresh,
45
61
  destination_sync_mode=DestinationSyncMode.overwrite,
46
62
  )
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
57
73
  def __init__(self, partitions: List[Partition]):
58
74
  self._partitions = partitions
59
75
 
60
- def generate(self, sync_mode: SyncMode) -> Iterable[Partition]:
76
+ def generate(self) -> Iterable[Partition]:
61
77
  yield from self._partitions
62
78
 
63
79
 
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
17
17
  StreamPartitionGenerator,
18
18
  )
19
19
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
20
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
20
21
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
21
22
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
23
+ from airbyte_cdk.sources.streams.core import Stream
22
24
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
23
25
 
26
+ _ANY_SYNC_MODE = SyncMode.full_refresh
27
+ _ANY_STATE = {"state_key": "state_value"}
28
+ _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
29
+
24
30
 
25
31
  @pytest.mark.parametrize(
26
32
  "stream_availability, expected_available, expected_message",
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
71
77
  stream_slices = [{"slice": 1}, {"slice": 2}]
72
78
  stream.stream_slices.return_value = stream_slices
73
79
 
74
- partition_generator = StreamPartitionGenerator(stream, message_repository)
80
+ partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
75
81
 
76
- partitions = list(partition_generator.generate(sync_mode))
82
+ partitions = list(partition_generator.generate())
77
83
  slices = [partition.to_slice() for partition in partitions]
78
84
  assert slices == stream_slices
85
+ stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
79
86
 
80
87
 
81
88
  @pytest.mark.parametrize(
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
95
102
  stream.transformer = transformer
96
103
  message_repository = InMemoryMessageRepository()
97
104
  _slice = None
98
- partition = StreamPartition(stream, _slice, message_repository)
105
+ sync_mode = SyncMode.full_refresh
106
+ cursor_field = None
107
+ state = None
108
+ partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
99
109
 
100
110
  a_log_message = AirbyteMessage(
101
111
  type=MessageType.LOG,
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
128
138
 
129
139
  message_repository = InMemoryMessageRepository()
130
140
  _slice = None
131
- partition = StreamPartition(stream, _slice, message_repository)
141
+
142
+ partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
132
143
 
133
144
  stream.read_records.side_effect = Exception()
134
145
 
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
148
159
  def test_stream_partition_hash(_slice, expected_hash):
149
160
  stream = Mock()
150
161
  stream.name = "stream"
151
- partition = StreamPartition(stream, _slice, Mock())
162
+ partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
152
163
 
153
164
  _hash = partition.__hash__()
154
165
  assert _hash == expected_hash
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
163
174
  json_schema={"type": "object"},
164
175
  supported_sync_modes=[SyncMode.full_refresh],
165
176
  )
166
- self._facade = StreamFacade(self._abstract_stream)
177
+ self._legacy_stream = Mock(spec=Stream)
178
+ self._cursor = Mock(spec=Cursor)
179
+ self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
167
180
  self._logger = Mock()
168
181
  self._source = Mock()
169
182
  self._max_workers = 10
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
191
204
  assert self._facade.get_json_schema() == json_schema
192
205
  self._abstract_stream.get_json_schema.assert_called_once_with()
193
206
 
194
- def test_supports_incremental_is_false(self):
195
- assert self._facade.supports_incremental is False
207
+ def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
208
+ assert (
209
+ StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
210
+ == self._legacy_stream.supports_incremental
211
+ )
212
+
213
+ def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
214
+ assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
196
215
 
197
216
  def test_check_availability_is_delegated_to_wrapped_stream(self):
198
217
  availability = StreamAvailable()
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
219
238
  assert actual_stream_data == expected_stream_data
220
239
 
221
240
  def test_read_records_incremental(self):
222
- with self.assertRaises(NotImplementedError):
223
- list(self._facade.read_records(SyncMode.incremental, None, None, None))
241
+ expected_stream_data = [{"data": 1}, {"data": 2}]
242
+ records = [Record(data) for data in expected_stream_data]
243
+ self._abstract_stream.read.return_value = records
244
+
245
+ actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
246
+
247
+ assert actual_stream_data == expected_stream_data
224
248
 
225
249
  def test_create_from_stream_stream(self):
226
250
  stream = Mock()
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
228
252
  stream.primary_key = "id"
229
253
  stream.cursor_field = "cursor"
230
254
 
231
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
255
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
232
256
 
233
257
  assert facade.name == "stream"
234
258
  assert facade.cursor_field == "cursor"
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
240
264
  stream.primary_key = None
241
265
  stream.cursor_field = []
242
266
 
243
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
267
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
244
268
  facade._abstract_stream._primary_key is None
245
269
 
246
270
  def test_create_from_stream_with_composite_primary_key(self):
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
249
273
  stream.primary_key = ["id", "name"]
250
274
  stream.cursor_field = []
251
275
 
252
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
276
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
253
277
  facade._abstract_stream._primary_key == ["id", "name"]
254
278
 
255
279
  def test_create_from_stream_with_empty_list_cursor(self):
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
257
281
  stream.primary_key = "id"
258
282
  stream.cursor_field = []
259
283
 
260
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
284
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
261
285
 
262
286
  assert facade.cursor_field == []
263
287
 
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
267
291
  stream.primary_key = [["field", "id"]]
268
292
 
269
293
  with self.assertRaises(ValueError):
270
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
294
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
271
295
 
272
296
  def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
273
297
  stream = Mock()
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
275
299
  stream.primary_key = 123
276
300
 
277
301
  with self.assertRaises(ValueError):
278
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
302
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
279
303
 
280
304
  def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
281
305
  stream = Mock()
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
284
308
  stream.cursor_field = ["field", "cursor"]
285
309
 
286
310
  with self.assertRaises(ValueError):
287
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
311
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
288
312
 
289
313
  def test_create_from_stream_with_cursor_field_as_list(self):
290
314
  stream = Mock()
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
292
316
  stream.primary_key = "id"
293
317
  stream.cursor_field = ["cursor"]
294
318
 
295
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
319
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
296
320
  assert facade.cursor_field == "cursor"
297
321
 
298
322
  def test_create_from_stream_none_message_repository(self):
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
302
326
  self._source.message_repository = None
303
327
 
304
328
  with self.assertRaises(ValueError):
305
- StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
329
+ StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
306
330
 
307
331
  def test_get_error_display_message_no_display_message(self):
308
332
  self._stream.get_error_display_message.return_value = "display_message"
309
333
 
310
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
334
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
311
335
 
312
336
  expected_display_message = None
313
337
  e = Exception()
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
319
343
  def test_get_error_display_message_with_display_message(self):
320
344
  self._stream.get_error_display_message.return_value = "display_message"
321
345
 
322
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
346
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
323
347
 
324
348
  expected_display_message = "display_message"
325
349
  e = ExceptionWithDisplayMessage("display_message")
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
338
362
  )
339
363
  def test_get_error_display_message(exception, expected_display_message):
340
364
  stream = Mock()
341
- facade = StreamFacade(stream)
365
+ legacy_stream = Mock()
366
+ cursor = Mock(spec=Cursor)
367
+ facade = StreamFacade(stream, legacy_stream, cursor)
342
368
 
343
369
  display_message = facade.get_error_display_message(exception)
344
370
 
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
21
21
 
22
22
  stream = Mock()
23
23
  message_repository = Mock()
24
- partitions = [StreamPartition(stream, s, message_repository) for s in slices]
25
- stream.generate.return_value = iter(partitions)
26
-
27
24
  sync_mode = SyncMode.full_refresh
25
+ cursor_field = None
26
+ state = None
27
+ partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
28
+ stream.generate.return_value = iter(partitions)
28
29
 
29
- partition_generator.generate_partitions(stream, sync_mode)
30
+ partition_generator.generate_partitions(stream)
30
31
 
31
32
  actual_partitions = []
32
33
  while partition := queue.get(False):
@@ -0,0 +1,130 @@
1
+ from typing import Any, Mapping, Optional
2
+ from unittest import TestCase
3
+ from unittest.mock import Mock
4
+
5
+ import pytest
6
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
7
+ from airbyte_cdk.sources.message import MessageRepository
8
+ from airbyte_cdk.sources.streams.concurrent.cursor import Comparable, ConcurrentCursor, CursorField
9
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
10
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
11
+
12
+ _A_STREAM_NAME = "a stream name"
13
+ _A_STREAM_NAMESPACE = "a stream namespace"
14
+ _ANY_STATE = None
15
+ _A_CURSOR_FIELD_KEY = "a_cursor_field_key"
16
+ _NO_PARTITION_IDENTIFIER = None
17
+ _NO_SLICE = None
18
+ _NO_SLICE_BOUNDARIES = None
19
+ _LOWER_SLICE_BOUNDARY_FIELD = "lower_boundary"
20
+ _UPPER_SLICE_BOUNDARY_FIELD = "upper_boundary"
21
+ _SLICE_BOUNDARY_FIELDS = (_LOWER_SLICE_BOUNDARY_FIELD, _UPPER_SLICE_BOUNDARY_FIELD)
22
+ _A_VERY_HIGH_CURSOR_VALUE = 1000000000
23
+
24
+
25
+ def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
26
+ partition = Mock(spec=Partition)
27
+ partition.to_slice.return_value = _slice
28
+ return partition
29
+
30
+
31
+ def _record(cursor_value: Comparable) -> Record:
32
+ return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
33
+
34
+
35
+ class ConcurrentCursorTest(TestCase):
36
+ def setUp(self) -> None:
37
+ self._message_repository = Mock(spec=MessageRepository)
38
+ self._state_manager = Mock(spec=ConnectorStateManager)
39
+
40
+ def _cursor_with_slice_boundary_fields(self) -> ConcurrentCursor:
41
+ return ConcurrentCursor(
42
+ _A_STREAM_NAME,
43
+ _A_STREAM_NAMESPACE,
44
+ _ANY_STATE,
45
+ self._message_repository,
46
+ self._state_manager,
47
+ CursorField(_A_CURSOR_FIELD_KEY),
48
+ _SLICE_BOUNDARY_FIELDS,
49
+ )
50
+
51
+ def _cursor_without_slice_boundary_fields(self) -> ConcurrentCursor:
52
+ return ConcurrentCursor(
53
+ _A_STREAM_NAME,
54
+ _A_STREAM_NAMESPACE,
55
+ _ANY_STATE,
56
+ self._message_repository,
57
+ self._state_manager,
58
+ CursorField(_A_CURSOR_FIELD_KEY),
59
+ None,
60
+ )
61
+
62
+ def test_given_boundary_fields_when_close_partition_then_emit_state(self) -> None:
63
+ self._cursor_with_slice_boundary_fields().close_partition(
64
+ _partition(
65
+ {_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30},
66
+ )
67
+ )
68
+
69
+ self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
70
+ self._state_manager.update_state_for_stream.assert_called_once_with(
71
+ _A_STREAM_NAME,
72
+ _A_STREAM_NAMESPACE,
73
+ {
74
+ "slices": [
75
+ {
76
+ "start": 12,
77
+ "end": 30,
78
+ },
79
+ ]
80
+ },
81
+ )
82
+
83
+ def test_given_boundary_fields_and_record_observed_when_close_partition_then_ignore_records(self) -> None:
84
+ cursor = self._cursor_with_slice_boundary_fields()
85
+ cursor.observe(_record(_A_VERY_HIGH_CURSOR_VALUE))
86
+
87
+ cursor.close_partition(_partition({_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30}))
88
+
89
+ assert self._state_manager.update_state_for_stream.call_args_list[0].args[2]["slices"][0]["end"] != _A_VERY_HIGH_CURSOR_VALUE
90
+
91
+ def test_given_no_boundary_fields_when_close_partition_then_emit_state(self) -> None:
92
+ cursor = self._cursor_without_slice_boundary_fields()
93
+ cursor.observe(_record(10))
94
+ cursor.close_partition(_partition(_NO_SLICE))
95
+
96
+ self._state_manager.update_state_for_stream.assert_called_once_with(
97
+ _A_STREAM_NAME,
98
+ _A_STREAM_NAMESPACE,
99
+ {
100
+ "slices": [
101
+ {
102
+ "start": 0,
103
+ "end": 10,
104
+ },
105
+ ]
106
+ },
107
+ )
108
+
109
+ def test_given_no_boundary_fields_when_close_multiple_partitions_then_raise_exception(self) -> None:
110
+ cursor = self._cursor_without_slice_boundary_fields()
111
+ cursor.observe(_record(10))
112
+ cursor.close_partition(_partition(_NO_SLICE))
113
+
114
+ with pytest.raises(ValueError):
115
+ cursor.close_partition(_partition(_NO_SLICE))
116
+
117
+ def test_given_no_records_observed_when_close_partition_then_do_not_emit_state(self) -> None:
118
+ cursor = self._cursor_without_slice_boundary_fields()
119
+ cursor.close_partition(_partition(_NO_SLICE))
120
+ assert self._message_repository.emit_message.call_count == 0
121
+
122
+ def test_given_slice_boundaries_and_no_slice_when_close_partition_then_raise_error(self) -> None:
123
+ cursor = self._cursor_with_slice_boundary_fields()
124
+ with pytest.raises(KeyError):
125
+ cursor.close_partition(_partition(_NO_SLICE))
126
+
127
+ def test_given_slice_boundaries_not_matching_slice_when_close_partition_then_raise_error(self) -> None:
128
+ cursor = self._cursor_with_slice_boundary_fields()
129
+ with pytest.raises(KeyError):
130
+ cursor.close_partition(_partition({"not_matching_key": "value"}))
@@ -5,9 +5,9 @@
5
5
  import unittest
6
6
  from unittest.mock import Mock, call
7
7
 
8
- import pytest
9
8
  from airbyte_cdk.models import AirbyteStream, SyncMode
10
9
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE
10
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
11
11
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
12
12
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
13
13
  from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
@@ -25,6 +25,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
25
25
  self._slice_logger = Mock()
26
26
  self._logger = Mock()
27
27
  self._message_repository = Mock()
28
+ self._cursor = Mock(spec=Cursor)
28
29
  self._stream = ThreadBasedConcurrentStream(
29
30
  self._partition_generator,
30
31
  self._max_workers,
@@ -39,6 +40,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
39
40
  1,
40
41
  2,
41
42
  0,
43
+ cursor=self._cursor,
42
44
  )
43
45
 
44
46
  def test_get_json_schema(self):
@@ -76,17 +78,20 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
76
78
  with self.assertRaises(Exception):
77
79
  self._stream._check_for_errors(futures)
78
80
 
79
- def test_read_raises_an_exception_if_a_partition_raises_an_exception(self):
81
+ def test_read_observe_records_and_close_partition(self):
80
82
  partition = Mock(spec=Partition)
81
- partition.read.side_effect = RuntimeError("error")
83
+ expected_records = [Record({"id": 1}), Record({"id": "2"})]
84
+ partition.read.return_value = expected_records
85
+ partition.to_slice.return_value = {"slice": "slice"}
86
+ self._slice_logger.should_log_slice_message.return_value = False
87
+
82
88
  self._partition_generator.generate.return_value = [partition]
83
- with pytest.raises(RuntimeError):
84
- list(self._stream.read())
89
+ actual_records = list(self._stream.read())
90
+
91
+ assert expected_records == actual_records
85
92
 
86
- def test_read_raises_an_exception_if_partition_generator_raises_an_exception(self):
87
- self._partition_generator.generate.side_effect = RuntimeError("error")
88
- with pytest.raises(RuntimeError):
89
- list(self._stream.read())
93
+ self._cursor.observe.has_calls([call(record) for record in expected_records])
94
+ self._cursor.close_partition.assert_called_once_with(partition)
90
95
 
91
96
  def test_read_no_slice_message(self):
92
97
  partition = Mock(spec=Partition)
@@ -218,7 +223,6 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
218
223
  assert expected_airbyte_stream == airbyte_stream
219
224
 
220
225
  def test_as_airbyte_stream_with_a_cursor(self):
221
-
222
226
  json_schema = {
223
227
  "type": "object",
224
228
  "properties": {
@@ -12,6 +12,7 @@ from airbyte_cdk.models import Type as MessageType
12
12
  from airbyte_cdk.sources.message import InMemoryMessageRepository
13
13
  from airbyte_cdk.sources.streams import Stream
14
14
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
16
  from airbyte_cdk.sources.streams.core import StreamData
16
17
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
17
18
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
@@ -19,6 +20,7 @@ from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
19
20
  _A_CURSOR_FIELD = ["NESTED", "CURSOR"]
20
21
  _DEFAULT_INTERNAL_CONFIG = InternalConfig()
21
22
  _STREAM_NAME = "STREAM"
23
+ _NO_STATE = None
22
24
 
23
25
 
24
26
  class _MockStream(Stream):
@@ -57,7 +59,7 @@ def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message
57
59
  source = Mock()
58
60
  source._slice_logger = slice_logger
59
61
  source.message_repository = message_repository
60
- stream = StreamFacade.create_from_stream(stream, source, logger, 1)
62
+ stream = StreamFacade.create_from_stream(stream, source, logger, 1, _NO_STATE, NoopCursor())
61
63
  stream.logger.setLevel(logger.level)
62
64
  return stream
63
65
 
@@ -996,10 +996,11 @@ class TestIncrementalRead:
996
996
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
997
997
  # stream 1 slice 2
998
998
  _as_record("s1", stream_output[0]),
999
- _as_record("s1", stream_output[1]),
1000
999
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1000
+ _as_record("s1", stream_output[1]),
1001
1001
  _as_record("s1", stream_output[2]),
1002
1002
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1003
+ _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1003
1004
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1004
1005
  # stream 2 slice 1
1005
1006
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
@@ -1011,17 +1012,18 @@ class TestIncrementalRead:
1011
1012
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1012
1013
  # stream 2 slice 2
1013
1014
  _as_record("s2", stream_output[0]),
1014
- _as_record("s2", stream_output[1]),
1015
1015
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1016
+ _as_record("s2", stream_output[1]),
1016
1017
  _as_record("s2", stream_output[2]),
1017
1018
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1019
+ _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1018
1020
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1019
1021
  ]
1020
1022
  )
1021
1023
 
1022
1024
  messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
1023
1025
 
1024
- assert expected == messages
1026
+ assert messages == expected
1025
1027
 
1026
1028
  @pytest.mark.parametrize(
1027
1029
  "per_stream_enabled",
@@ -1108,11 +1110,12 @@ class TestIncrementalRead:
1108
1110
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1109
1111
  # stream 1 slice 2
1110
1112
  stream_data_to_airbyte_message("s1", stream_output[0]),
1113
+ _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1111
1114
  stream_data_to_airbyte_message("s1", stream_output[1]),
1112
1115
  stream_data_to_airbyte_message("s1", stream_output[2]),
1113
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1114
1116
  stream_data_to_airbyte_message("s1", stream_output[3]),
1115
1117
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1118
+ _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1116
1119
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1117
1120
  # stream 2 slice 1
1118
1121
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
@@ -1125,33 +1128,33 @@ class TestIncrementalRead:
1125
1128
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1126
1129
  # stream 2 slice 2
1127
1130
  stream_data_to_airbyte_message("s2", stream_output[0]),
1131
+ _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1128
1132
  stream_data_to_airbyte_message("s2", stream_output[1]),
1129
1133
  stream_data_to_airbyte_message("s2", stream_output[2]),
1130
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1131
1134
  stream_data_to_airbyte_message("s2", stream_output[3]),
1132
1135
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1136
+ _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1133
1137
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1134
1138
  ]
1135
1139
  )
1136
1140
 
1137
1141
  messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
1138
1142
 
1139
- assert expected == messages
1143
+ assert messages == expected
1140
1144
 
1141
1145
 
1142
1146
  def test_checkpoint_state_from_stream_instance():
1143
1147
  teams_stream = MockStreamOverridesStateMethod()
1144
1148
  managers_stream = StreamNoStateMethod()
1145
- src = MockSource(streams=[teams_stream, managers_stream])
1146
1149
  state_manager = ConnectorStateManager({"teams": teams_stream, "managers": managers_stream}, [])
1147
1150
 
1148
1151
  # The stream_state passed to checkpoint_state() should be ignored since stream implements state function
1149
1152
  teams_stream.state = {"updated_at": "2022-09-11"}
1150
- actual_message = src._checkpoint_state(teams_stream, {"ignored": "state"}, state_manager)
1153
+ actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager, True)
1151
1154
  assert actual_message == _as_state({"teams": {"updated_at": "2022-09-11"}}, "teams", {"updated_at": "2022-09-11"})
1152
1155
 
1153
1156
  # The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
1154
- actual_message = src._checkpoint_state(managers_stream, {"updated": "expected_here"}, state_manager)
1157
+ actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager, True)
1155
1158
  assert actual_message == _as_state(
1156
1159
  {"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
1157
1160
  )