airbyte-cdk 0.52.7__py3-none-any.whl → 0.52.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. airbyte_cdk/destinations/vector_db_based/config.py +1 -0
  2. airbyte_cdk/sources/abstract_source.py +12 -61
  3. airbyte_cdk/sources/message/repository.py +0 -6
  4. airbyte_cdk/sources/source.py +14 -13
  5. airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
  6. airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
  7. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
  8. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
  9. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
  10. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
  11. airbyte_cdk/sources/streams/core.py +71 -1
  12. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +27 -25
  14. unit_tests/sources/message/test_repository.py +7 -20
  15. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
  16. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
  17. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
  18. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
  19. unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
  20. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
  21. unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
  22. unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
  23. unit_tests/sources/streams/test_stream_read.py +3 -1
  24. unit_tests/sources/test_abstract_source.py +12 -9
  25. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
  26. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +0 -0
  27. {airbyte_cdk-0.52.7.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
11
11
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
12
12
  from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
13
13
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
14
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
15
+ test_incremental_stream_with_slice_boundaries,
16
+ test_incremental_stream_without_slice_boundaries,
14
17
  test_stream_facade_multiple_streams,
15
18
  test_stream_facade_raises_exception,
16
19
  test_stream_facade_single_stream,
@@ -43,6 +46,9 @@ scenarios = [
43
46
  test_stream_facade_single_stream_with_multiple_slices,
44
47
  test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
45
48
  test_stream_facade_raises_exception,
49
+ test_incremental_stream_with_slice_boundaries,
50
+ test_incremental_stream_without_slice_boundaries,
51
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
46
52
  ]
47
53
 
48
54
 
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
16
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
16
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
17
18
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
18
19
  from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
20
+ from airbyte_cdk.sources.streams.core import StreamData
19
21
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
20
22
  from airbyte_protocol.models import ConfiguredAirbyteStream
21
23
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
22
24
 
23
25
 
26
+ class LegacyStream(Stream):
27
+ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
28
+ return None
29
+
30
+ def read_records(
31
+ self,
32
+ sync_mode: SyncMode,
33
+ cursor_field: Optional[List[str]] = None,
34
+ stream_slice: Optional[Mapping[str, Any]] = None,
35
+ stream_state: Optional[Mapping[str, Any]] = None,
36
+ ) -> Iterable[StreamData]:
37
+ yield from []
38
+
39
+
24
40
  class ConcurrentCdkSource(AbstractSource):
25
41
  def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
26
42
  self._streams = streams
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
31
47
  return True, None
32
48
 
33
49
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
34
- return [StreamFacade(s) for s in self._streams]
50
+ return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
35
51
 
36
52
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
37
53
  return ConnectorSpecification(connectionSpecification={})
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
40
56
  return ConfiguredAirbyteCatalog(
41
57
  streams=[
42
58
  ConfiguredAirbyteStream(
43
- stream=StreamFacade(s).as_airbyte_stream(),
59
+ stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
44
60
  sync_mode=SyncMode.full_refresh,
45
61
  destination_sync_mode=DestinationSyncMode.overwrite,
46
62
  )
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
57
73
  def __init__(self, partitions: List[Partition]):
58
74
  self._partitions = partitions
59
75
 
60
- def generate(self, sync_mode: SyncMode) -> Iterable[Partition]:
76
+ def generate(self) -> Iterable[Partition]:
61
77
  yield from self._partitions
62
78
 
63
79
 
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
17
17
  StreamPartitionGenerator,
18
18
  )
19
19
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
20
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
20
21
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
21
22
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
23
+ from airbyte_cdk.sources.streams.core import Stream
22
24
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
23
25
 
26
+ _ANY_SYNC_MODE = SyncMode.full_refresh
27
+ _ANY_STATE = {"state_key": "state_value"}
28
+ _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
29
+
24
30
 
25
31
  @pytest.mark.parametrize(
26
32
  "stream_availability, expected_available, expected_message",
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
71
77
  stream_slices = [{"slice": 1}, {"slice": 2}]
72
78
  stream.stream_slices.return_value = stream_slices
73
79
 
74
- partition_generator = StreamPartitionGenerator(stream, message_repository)
80
+ partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
75
81
 
76
- partitions = list(partition_generator.generate(sync_mode))
82
+ partitions = list(partition_generator.generate())
77
83
  slices = [partition.to_slice() for partition in partitions]
78
84
  assert slices == stream_slices
85
+ stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
79
86
 
80
87
 
81
88
  @pytest.mark.parametrize(
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
95
102
  stream.transformer = transformer
96
103
  message_repository = InMemoryMessageRepository()
97
104
  _slice = None
98
- partition = StreamPartition(stream, _slice, message_repository)
105
+ sync_mode = SyncMode.full_refresh
106
+ cursor_field = None
107
+ state = None
108
+ partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
99
109
 
100
110
  a_log_message = AirbyteMessage(
101
111
  type=MessageType.LOG,
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
128
138
 
129
139
  message_repository = InMemoryMessageRepository()
130
140
  _slice = None
131
- partition = StreamPartition(stream, _slice, message_repository)
141
+
142
+ partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
132
143
 
133
144
  stream.read_records.side_effect = Exception()
134
145
 
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
148
159
  def test_stream_partition_hash(_slice, expected_hash):
149
160
  stream = Mock()
150
161
  stream.name = "stream"
151
- partition = StreamPartition(stream, _slice, Mock())
162
+ partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
152
163
 
153
164
  _hash = partition.__hash__()
154
165
  assert _hash == expected_hash
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
163
174
  json_schema={"type": "object"},
164
175
  supported_sync_modes=[SyncMode.full_refresh],
165
176
  )
166
- self._facade = StreamFacade(self._abstract_stream)
177
+ self._legacy_stream = Mock(spec=Stream)
178
+ self._cursor = Mock(spec=Cursor)
179
+ self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
167
180
  self._logger = Mock()
168
181
  self._source = Mock()
169
182
  self._max_workers = 10
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
191
204
  assert self._facade.get_json_schema() == json_schema
192
205
  self._abstract_stream.get_json_schema.assert_called_once_with()
193
206
 
194
- def test_supports_incremental_is_false(self):
195
- assert self._facade.supports_incremental is False
207
+ def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
208
+ assert (
209
+ StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
210
+ == self._legacy_stream.supports_incremental
211
+ )
212
+
213
+ def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
214
+ assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
196
215
 
197
216
  def test_check_availability_is_delegated_to_wrapped_stream(self):
198
217
  availability = StreamAvailable()
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
219
238
  assert actual_stream_data == expected_stream_data
220
239
 
221
240
  def test_read_records_incremental(self):
222
- with self.assertRaises(NotImplementedError):
223
- list(self._facade.read_records(SyncMode.incremental, None, None, None))
241
+ expected_stream_data = [{"data": 1}, {"data": 2}]
242
+ records = [Record(data) for data in expected_stream_data]
243
+ self._abstract_stream.read.return_value = records
244
+
245
+ actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
246
+
247
+ assert actual_stream_data == expected_stream_data
224
248
 
225
249
  def test_create_from_stream_stream(self):
226
250
  stream = Mock()
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
228
252
  stream.primary_key = "id"
229
253
  stream.cursor_field = "cursor"
230
254
 
231
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
255
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
232
256
 
233
257
  assert facade.name == "stream"
234
258
  assert facade.cursor_field == "cursor"
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
240
264
  stream.primary_key = None
241
265
  stream.cursor_field = []
242
266
 
243
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
267
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
244
268
  facade._abstract_stream._primary_key is None
245
269
 
246
270
  def test_create_from_stream_with_composite_primary_key(self):
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
249
273
  stream.primary_key = ["id", "name"]
250
274
  stream.cursor_field = []
251
275
 
252
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
276
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
253
277
  facade._abstract_stream._primary_key == ["id", "name"]
254
278
 
255
279
  def test_create_from_stream_with_empty_list_cursor(self):
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
257
281
  stream.primary_key = "id"
258
282
  stream.cursor_field = []
259
283
 
260
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
284
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
261
285
 
262
286
  assert facade.cursor_field == []
263
287
 
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
267
291
  stream.primary_key = [["field", "id"]]
268
292
 
269
293
  with self.assertRaises(ValueError):
270
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
294
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
271
295
 
272
296
  def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
273
297
  stream = Mock()
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
275
299
  stream.primary_key = 123
276
300
 
277
301
  with self.assertRaises(ValueError):
278
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
302
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
279
303
 
280
304
  def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
281
305
  stream = Mock()
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
284
308
  stream.cursor_field = ["field", "cursor"]
285
309
 
286
310
  with self.assertRaises(ValueError):
287
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
311
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
288
312
 
289
313
  def test_create_from_stream_with_cursor_field_as_list(self):
290
314
  stream = Mock()
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
292
316
  stream.primary_key = "id"
293
317
  stream.cursor_field = ["cursor"]
294
318
 
295
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
319
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
296
320
  assert facade.cursor_field == "cursor"
297
321
 
298
322
  def test_create_from_stream_none_message_repository(self):
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
302
326
  self._source.message_repository = None
303
327
 
304
328
  with self.assertRaises(ValueError):
305
- StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
329
+ StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
306
330
 
307
331
  def test_get_error_display_message_no_display_message(self):
308
332
  self._stream.get_error_display_message.return_value = "display_message"
309
333
 
310
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
334
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
311
335
 
312
336
  expected_display_message = None
313
337
  e = Exception()
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
319
343
  def test_get_error_display_message_with_display_message(self):
320
344
  self._stream.get_error_display_message.return_value = "display_message"
321
345
 
322
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
346
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
323
347
 
324
348
  expected_display_message = "display_message"
325
349
  e = ExceptionWithDisplayMessage("display_message")
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
338
362
  )
339
363
  def test_get_error_display_message(exception, expected_display_message):
340
364
  stream = Mock()
341
- facade = StreamFacade(stream)
365
+ legacy_stream = Mock()
366
+ cursor = Mock(spec=Cursor)
367
+ facade = StreamFacade(stream, legacy_stream, cursor)
342
368
 
343
369
  display_message = facade.get_error_display_message(exception)
344
370
 
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
21
21
 
22
22
  stream = Mock()
23
23
  message_repository = Mock()
24
- partitions = [StreamPartition(stream, s, message_repository) for s in slices]
25
- stream.generate.return_value = iter(partitions)
26
-
27
24
  sync_mode = SyncMode.full_refresh
25
+ cursor_field = None
26
+ state = None
27
+ partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
28
+ stream.generate.return_value = iter(partitions)
28
29
 
29
- partition_generator.generate_partitions(stream, sync_mode)
30
+ partition_generator.generate_partitions(stream)
30
31
 
31
32
  actual_partitions = []
32
33
  while partition := queue.get(False):
@@ -0,0 +1,130 @@
1
+ from typing import Any, Mapping, Optional
2
+ from unittest import TestCase
3
+ from unittest.mock import Mock
4
+
5
+ import pytest
6
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
7
+ from airbyte_cdk.sources.message import MessageRepository
8
+ from airbyte_cdk.sources.streams.concurrent.cursor import Comparable, ConcurrentCursor, CursorField
9
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
10
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
11
+
12
+ _A_STREAM_NAME = "a stream name"
13
+ _A_STREAM_NAMESPACE = "a stream namespace"
14
+ _ANY_STATE = None
15
+ _A_CURSOR_FIELD_KEY = "a_cursor_field_key"
16
+ _NO_PARTITION_IDENTIFIER = None
17
+ _NO_SLICE = None
18
+ _NO_SLICE_BOUNDARIES = None
19
+ _LOWER_SLICE_BOUNDARY_FIELD = "lower_boundary"
20
+ _UPPER_SLICE_BOUNDARY_FIELD = "upper_boundary"
21
+ _SLICE_BOUNDARY_FIELDS = (_LOWER_SLICE_BOUNDARY_FIELD, _UPPER_SLICE_BOUNDARY_FIELD)
22
+ _A_VERY_HIGH_CURSOR_VALUE = 1000000000
23
+
24
+
25
+ def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
26
+ partition = Mock(spec=Partition)
27
+ partition.to_slice.return_value = _slice
28
+ return partition
29
+
30
+
31
+ def _record(cursor_value: Comparable) -> Record:
32
+ return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
33
+
34
+
35
+ class ConcurrentCursorTest(TestCase):
36
+ def setUp(self) -> None:
37
+ self._message_repository = Mock(spec=MessageRepository)
38
+ self._state_manager = Mock(spec=ConnectorStateManager)
39
+
40
+ def _cursor_with_slice_boundary_fields(self) -> ConcurrentCursor:
41
+ return ConcurrentCursor(
42
+ _A_STREAM_NAME,
43
+ _A_STREAM_NAMESPACE,
44
+ _ANY_STATE,
45
+ self._message_repository,
46
+ self._state_manager,
47
+ CursorField(_A_CURSOR_FIELD_KEY),
48
+ _SLICE_BOUNDARY_FIELDS,
49
+ )
50
+
51
+ def _cursor_without_slice_boundary_fields(self) -> ConcurrentCursor:
52
+ return ConcurrentCursor(
53
+ _A_STREAM_NAME,
54
+ _A_STREAM_NAMESPACE,
55
+ _ANY_STATE,
56
+ self._message_repository,
57
+ self._state_manager,
58
+ CursorField(_A_CURSOR_FIELD_KEY),
59
+ None,
60
+ )
61
+
62
+ def test_given_boundary_fields_when_close_partition_then_emit_state(self) -> None:
63
+ self._cursor_with_slice_boundary_fields().close_partition(
64
+ _partition(
65
+ {_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30},
66
+ )
67
+ )
68
+
69
+ self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
70
+ self._state_manager.update_state_for_stream.assert_called_once_with(
71
+ _A_STREAM_NAME,
72
+ _A_STREAM_NAMESPACE,
73
+ {
74
+ "slices": [
75
+ {
76
+ "start": 12,
77
+ "end": 30,
78
+ },
79
+ ]
80
+ },
81
+ )
82
+
83
+ def test_given_boundary_fields_and_record_observed_when_close_partition_then_ignore_records(self) -> None:
84
+ cursor = self._cursor_with_slice_boundary_fields()
85
+ cursor.observe(_record(_A_VERY_HIGH_CURSOR_VALUE))
86
+
87
+ cursor.close_partition(_partition({_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30}))
88
+
89
+ assert self._state_manager.update_state_for_stream.call_args_list[0].args[2]["slices"][0]["end"] != _A_VERY_HIGH_CURSOR_VALUE
90
+
91
+ def test_given_no_boundary_fields_when_close_partition_then_emit_state(self) -> None:
92
+ cursor = self._cursor_without_slice_boundary_fields()
93
+ cursor.observe(_record(10))
94
+ cursor.close_partition(_partition(_NO_SLICE))
95
+
96
+ self._state_manager.update_state_for_stream.assert_called_once_with(
97
+ _A_STREAM_NAME,
98
+ _A_STREAM_NAMESPACE,
99
+ {
100
+ "slices": [
101
+ {
102
+ "start": 0,
103
+ "end": 10,
104
+ },
105
+ ]
106
+ },
107
+ )
108
+
109
+ def test_given_no_boundary_fields_when_close_multiple_partitions_then_raise_exception(self) -> None:
110
+ cursor = self._cursor_without_slice_boundary_fields()
111
+ cursor.observe(_record(10))
112
+ cursor.close_partition(_partition(_NO_SLICE))
113
+
114
+ with pytest.raises(ValueError):
115
+ cursor.close_partition(_partition(_NO_SLICE))
116
+
117
+ def test_given_no_records_observed_when_close_partition_then_do_not_emit_state(self) -> None:
118
+ cursor = self._cursor_without_slice_boundary_fields()
119
+ cursor.close_partition(_partition(_NO_SLICE))
120
+ assert self._message_repository.emit_message.call_count == 0
121
+
122
+ def test_given_slice_boundaries_and_no_slice_when_close_partition_then_raise_error(self) -> None:
123
+ cursor = self._cursor_with_slice_boundary_fields()
124
+ with pytest.raises(KeyError):
125
+ cursor.close_partition(_partition(_NO_SLICE))
126
+
127
+ def test_given_slice_boundaries_not_matching_slice_when_close_partition_then_raise_error(self) -> None:
128
+ cursor = self._cursor_with_slice_boundary_fields()
129
+ with pytest.raises(KeyError):
130
+ cursor.close_partition(_partition({"not_matching_key": "value"}))
@@ -5,9 +5,9 @@
5
5
  import unittest
6
6
  from unittest.mock import Mock, call
7
7
 
8
- import pytest
9
8
  from airbyte_cdk.models import AirbyteStream, SyncMode
10
9
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE
10
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
11
11
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
12
12
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
13
13
  from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
@@ -25,6 +25,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
25
25
  self._slice_logger = Mock()
26
26
  self._logger = Mock()
27
27
  self._message_repository = Mock()
28
+ self._cursor = Mock(spec=Cursor)
28
29
  self._stream = ThreadBasedConcurrentStream(
29
30
  self._partition_generator,
30
31
  self._max_workers,
@@ -39,6 +40,7 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
39
40
  1,
40
41
  2,
41
42
  0,
43
+ cursor=self._cursor,
42
44
  )
43
45
 
44
46
  def test_get_json_schema(self):
@@ -76,17 +78,20 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
76
78
  with self.assertRaises(Exception):
77
79
  self._stream._check_for_errors(futures)
78
80
 
79
- def test_read_raises_an_exception_if_a_partition_raises_an_exception(self):
81
+ def test_read_observe_records_and_close_partition(self):
80
82
  partition = Mock(spec=Partition)
81
- partition.read.side_effect = RuntimeError("error")
83
+ expected_records = [Record({"id": 1}), Record({"id": "2"})]
84
+ partition.read.return_value = expected_records
85
+ partition.to_slice.return_value = {"slice": "slice"}
86
+ self._slice_logger.should_log_slice_message.return_value = False
87
+
82
88
  self._partition_generator.generate.return_value = [partition]
83
- with pytest.raises(RuntimeError):
84
- list(self._stream.read())
89
+ actual_records = list(self._stream.read())
90
+
91
+ assert expected_records == actual_records
85
92
 
86
- def test_read_raises_an_exception_if_partition_generator_raises_an_exception(self):
87
- self._partition_generator.generate.side_effect = RuntimeError("error")
88
- with pytest.raises(RuntimeError):
89
- list(self._stream.read())
93
+ self._cursor.observe.has_calls([call(record) for record in expected_records])
94
+ self._cursor.close_partition.assert_called_once_with(partition)
90
95
 
91
96
  def test_read_no_slice_message(self):
92
97
  partition = Mock(spec=Partition)
@@ -218,7 +223,6 @@ class ThreadBasedConcurrentStreamTest(unittest.TestCase):
218
223
  assert expected_airbyte_stream == airbyte_stream
219
224
 
220
225
  def test_as_airbyte_stream_with_a_cursor(self):
221
-
222
226
  json_schema = {
223
227
  "type": "object",
224
228
  "properties": {
@@ -12,6 +12,7 @@ from airbyte_cdk.models import Type as MessageType
12
12
  from airbyte_cdk.sources.message import InMemoryMessageRepository
13
13
  from airbyte_cdk.sources.streams import Stream
14
14
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
16
  from airbyte_cdk.sources.streams.core import StreamData
16
17
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
17
18
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
@@ -19,6 +20,7 @@ from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger
19
20
  _A_CURSOR_FIELD = ["NESTED", "CURSOR"]
20
21
  _DEFAULT_INTERNAL_CONFIG = InternalConfig()
21
22
  _STREAM_NAME = "STREAM"
23
+ _NO_STATE = None
22
24
 
23
25
 
24
26
  class _MockStream(Stream):
@@ -57,7 +59,7 @@ def _concurrent_stream(slice_to_partition_mapping, slice_logger, logger, message
57
59
  source = Mock()
58
60
  source._slice_logger = slice_logger
59
61
  source.message_repository = message_repository
60
- stream = StreamFacade.create_from_stream(stream, source, logger, 1)
62
+ stream = StreamFacade.create_from_stream(stream, source, logger, 1, _NO_STATE, NoopCursor())
61
63
  stream.logger.setLevel(logger.level)
62
64
  return stream
63
65
 
@@ -996,10 +996,11 @@ class TestIncrementalRead:
996
996
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
997
997
  # stream 1 slice 2
998
998
  _as_record("s1", stream_output[0]),
999
- _as_record("s1", stream_output[1]),
1000
999
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1000
+ _as_record("s1", stream_output[1]),
1001
1001
  _as_record("s1", stream_output[2]),
1002
1002
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1003
+ _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1003
1004
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1004
1005
  # stream 2 slice 1
1005
1006
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
@@ -1011,17 +1012,18 @@ class TestIncrementalRead:
1011
1012
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1012
1013
  # stream 2 slice 2
1013
1014
  _as_record("s2", stream_output[0]),
1014
- _as_record("s2", stream_output[1]),
1015
1015
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1016
+ _as_record("s2", stream_output[1]),
1016
1017
  _as_record("s2", stream_output[2]),
1017
1018
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1019
+ _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1018
1020
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1019
1021
  ]
1020
1022
  )
1021
1023
 
1022
1024
  messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
1023
1025
 
1024
- assert expected == messages
1026
+ assert messages == expected
1025
1027
 
1026
1028
  @pytest.mark.parametrize(
1027
1029
  "per_stream_enabled",
@@ -1108,11 +1110,12 @@ class TestIncrementalRead:
1108
1110
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1109
1111
  # stream 1 slice 2
1110
1112
  stream_data_to_airbyte_message("s1", stream_output[0]),
1113
+ _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1111
1114
  stream_data_to_airbyte_message("s1", stream_output[1]),
1112
1115
  stream_data_to_airbyte_message("s1", stream_output[2]),
1113
- _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1114
1116
  stream_data_to_airbyte_message("s1", stream_output[3]),
1115
1117
  _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1118
+ _as_state({"s1": state}, "s1", state) if per_stream_enabled else _as_state({"s1": state}),
1116
1119
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1117
1120
  # stream 2 slice 1
1118
1121
  _as_stream_status("s2", AirbyteStreamStatus.STARTED),
@@ -1125,33 +1128,33 @@ class TestIncrementalRead:
1125
1128
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1126
1129
  # stream 2 slice 2
1127
1130
  stream_data_to_airbyte_message("s2", stream_output[0]),
1131
+ _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1128
1132
  stream_data_to_airbyte_message("s2", stream_output[1]),
1129
1133
  stream_data_to_airbyte_message("s2", stream_output[2]),
1130
- _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1131
1134
  stream_data_to_airbyte_message("s2", stream_output[3]),
1132
1135
  _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1136
+ _as_state({"s1": state, "s2": state}, "s2", state) if per_stream_enabled else _as_state({"s1": state, "s2": state}),
1133
1137
  _as_stream_status("s2", AirbyteStreamStatus.COMPLETE),
1134
1138
  ]
1135
1139
  )
1136
1140
 
1137
1141
  messages = _fix_emitted_at(list(src.read(logger, {}, catalog, state=input_state)))
1138
1142
 
1139
- assert expected == messages
1143
+ assert messages == expected
1140
1144
 
1141
1145
 
1142
1146
  def test_checkpoint_state_from_stream_instance():
1143
1147
  teams_stream = MockStreamOverridesStateMethod()
1144
1148
  managers_stream = StreamNoStateMethod()
1145
- src = MockSource(streams=[teams_stream, managers_stream])
1146
1149
  state_manager = ConnectorStateManager({"teams": teams_stream, "managers": managers_stream}, [])
1147
1150
 
1148
1151
  # The stream_state passed to checkpoint_state() should be ignored since stream implements state function
1149
1152
  teams_stream.state = {"updated_at": "2022-09-11"}
1150
- actual_message = src._checkpoint_state(teams_stream, {"ignored": "state"}, state_manager)
1153
+ actual_message = teams_stream._checkpoint_state({"ignored": "state"}, state_manager, True)
1151
1154
  assert actual_message == _as_state({"teams": {"updated_at": "2022-09-11"}}, "teams", {"updated_at": "2022-09-11"})
1152
1155
 
1153
1156
  # The stream_state passed to checkpoint_state() should be used since the stream does not implement state function
1154
- actual_message = src._checkpoint_state(managers_stream, {"updated": "expected_here"}, state_manager)
1157
+ actual_message = managers_stream._checkpoint_state({"updated": "expected_here"}, state_manager, True)
1155
1158
  assert actual_message == _as_state(
1156
1159
  {"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
1157
1160
  )