airbyte-cdk 0.52.6__py3-none-any.whl → 0.52.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. airbyte_cdk/destinations/vector_db_based/config.py +1 -0
  2. airbyte_cdk/sources/abstract_source.py +12 -61
  3. airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -1
  4. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +1 -2
  5. airbyte_cdk/sources/message/repository.py +0 -6
  6. airbyte_cdk/sources/source.py +14 -13
  7. airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
  8. airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
  9. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
  10. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
  11. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
  12. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
  13. airbyte_cdk/sources/streams/core.py +71 -1
  14. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +3 -3
  15. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +32 -30
  16. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +1 -1
  17. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +5 -0
  18. unit_tests/sources/file_based/scenarios/csv_scenarios.py +1 -1
  19. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +16 -0
  20. unit_tests/sources/message/test_repository.py +7 -20
  21. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
  22. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
  23. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
  24. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
  25. unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
  26. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
  27. unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
  28. unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
  29. unit_tests/sources/streams/test_stream_read.py +3 -1
  30. unit_tests/sources/test_abstract_source.py +12 -9
  31. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
  32. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -6,27 +6,61 @@ from typing import Any, List, Mapping, Optional, Tuple, Union
6
6
 
7
7
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
8
8
  from airbyte_cdk.sources import AbstractSource
9
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
9
10
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
10
11
  from airbyte_cdk.sources.streams import Stream
11
12
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
13
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
12
14
  from airbyte_protocol.models import ConfiguredAirbyteStream
13
15
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
14
16
 
17
+ _NO_STATE = None
18
+
15
19
 
16
20
  class StreamFacadeSource(AbstractSource):
17
- def __init__(self, streams: List[Stream], max_workers: int):
21
+ def __init__(
22
+ self,
23
+ streams: List[Stream],
24
+ max_workers: int,
25
+ cursor_field: Optional[CursorField] = None,
26
+ cursor_boundaries: Optional[Tuple[str, str]] = None,
27
+ ):
18
28
  self._streams = streams
19
29
  self._max_workers = max_workers
30
+ self._message_repository = InMemoryMessageRepository()
31
+ self._cursor_field = cursor_field
32
+ self._cursor_boundaries = cursor_boundaries
20
33
 
21
34
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
22
35
  return True, None
23
36
 
24
37
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
25
- return [StreamFacade.create_from_stream(stream, self, stream.logger, self._max_workers) for stream in self._streams]
38
+ state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=_NO_STATE)
39
+ return [
40
+ StreamFacade.create_from_stream(
41
+ stream,
42
+ self,
43
+ stream.logger,
44
+ self._max_workers,
45
+ _NO_STATE,
46
+ ConcurrentCursor(
47
+ stream.name,
48
+ stream.namespace,
49
+ _NO_STATE,
50
+ self.message_repository, # type: ignore # for this source specifically, we always return `InMemoryMessageRepository`
51
+ state_manager,
52
+ self._cursor_field,
53
+ self._cursor_boundaries,
54
+ )
55
+ if self._cursor_field
56
+ else NoopCursor(),
57
+ )
58
+ for stream in self._streams
59
+ ]
26
60
 
27
61
  @property
28
62
  def message_repository(self) -> Union[None, MessageRepository]:
29
- return InMemoryMessageRepository()
63
+ return self._message_repository
30
64
 
31
65
  def spec(self, logger: logging.Logger) -> ConnectorSpecification:
32
66
  return ConnectorSpecification(connectionSpecification={})
@@ -49,14 +83,21 @@ class StreamFacadeSourceBuilder(SourceBuilder[StreamFacadeSource]):
49
83
  self._source = None
50
84
  self._streams = []
51
85
  self._max_workers = 1
86
+ self._cursor_field = None
87
+ self._cursor_boundaries = None
52
88
 
53
89
  def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder":
54
90
  self._streams = streams
55
91
  return self
56
92
 
57
- def set_max_workers(self, max_workers: int):
93
+ def set_max_workers(self, max_workers: int) -> "StreamFacadeSourceBuilder":
58
94
  self._max_workers = max_workers
59
95
  return self
60
96
 
97
+ def set_incremental(self, cursor_field: CursorField, cursor_boundaries: Optional[Tuple[str, str]]) -> "StreamFacadeSourceBuilder":
98
+ self._cursor_field = cursor_field
99
+ self._cursor_boundaries = cursor_boundaries
100
+ return self
101
+
61
102
  def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> StreamFacadeSource:
62
- return StreamFacadeSource(self._streams, self._max_workers)
103
+ return StreamFacadeSource(self._streams, self._max_workers, self._cursor_field, self._cursor_boundaries)
@@ -1,26 +1,25 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
- from typing import Any, Iterable, List, Mapping, Optional, Union
4
+ from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
5
5
 
6
6
  from airbyte_cdk.models import SyncMode
7
7
  from airbyte_cdk.sources.streams import Stream
8
+ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
8
9
  from airbyte_cdk.sources.streams.core import StreamData
9
- from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
10
+ from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
10
11
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_builder import StreamFacadeSourceBuilder
11
12
 
12
13
 
13
14
  class _MockStream(Stream):
14
15
  def __init__(
15
16
  self,
16
- slice_key,
17
- slice_values_to_records_or_exception: Mapping[Optional[str], List[Union[Mapping[str, Any], Exception]]],
17
+ slices_and_records_or_exception: Iterable[Tuple[Optional[Mapping[str, Any]], Iterable[Union[Exception, Mapping[str, Any]]]]],
18
18
  name,
19
19
  json_schema,
20
20
  primary_key=None,
21
21
  ):
22
- self._slice_key = slice_key
23
- self._slice_values_to_records = slice_values_to_records_or_exception
22
+ self._slices_and_records_or_exception = slices_and_records_or_exception
24
23
  self._name = name
25
24
  self._json_schema = json_schema
26
25
  self._primary_key = primary_key
@@ -32,19 +31,12 @@ class _MockStream(Stream):
32
31
  stream_slice: Optional[Mapping[str, Any]] = None,
33
32
  stream_state: Optional[Mapping[str, Any]] = None,
34
33
  ) -> Iterable[StreamData]:
35
- for record_or_exception in self._get_record_or_exception_iterable(stream_slice):
36
- if isinstance(record_or_exception, Exception):
37
- raise record_or_exception
38
- else:
39
- yield record_or_exception
40
-
41
- def _get_record_or_exception_iterable(
42
- self, stream_slice: Optional[Mapping[str, Any]] = None
43
- ) -> Iterable[Union[Mapping[str, Any], Exception]]:
44
- if stream_slice is None:
45
- return self._slice_values_to_records[None]
46
- else:
47
- return self._slice_values_to_records[stream_slice[self._slice_key]]
34
+ for _slice, records_or_exception in self._slices_and_records_or_exception:
35
+ if stream_slice == _slice:
36
+ for item in records_or_exception:
37
+ if isinstance(item, Exception):
38
+ raise item
39
+ yield item
48
40
 
49
41
  @property
50
42
  def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
@@ -60,16 +52,16 @@ class _MockStream(Stream):
60
52
  def stream_slices(
61
53
  self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
62
54
  ) -> Iterable[Optional[Mapping[str, Any]]]:
63
- if self._slice_key:
64
- for slice_value in self._slice_values_to_records.keys():
65
- yield {self._slice_key: slice_value}
55
+ if self._slices_and_records_or_exception:
56
+ yield from [_slice for _slice, records_or_exception in self._slices_and_records_or_exception]
66
57
  else:
67
58
  yield None
68
59
 
69
60
 
70
61
  _stream1 = _MockStream(
71
- None,
72
- {None: [{"id": "1"}, {"id": "2"}]},
62
+ [
63
+ (None, [{"id": "1"}, {"id": "2"}]),
64
+ ],
73
65
  "stream1",
74
66
  json_schema={
75
67
  "type": "object",
@@ -80,8 +72,9 @@ _stream1 = _MockStream(
80
72
  )
81
73
 
82
74
  _stream_raising_exception = _MockStream(
83
- None,
84
- {None: [{"id": "1"}, ValueError("test exception")]},
75
+ [
76
+ (None, [{"id": "1"}, ValueError("test exception")]),
77
+ ],
85
78
  "stream1",
86
79
  json_schema={
87
80
  "type": "object",
@@ -92,8 +85,9 @@ _stream_raising_exception = _MockStream(
92
85
  )
93
86
 
94
87
  _stream_with_primary_key = _MockStream(
95
- None,
96
- {None: [{"id": "1"}, {"id": "2"}]},
88
+ [
89
+ (None, [{"id": "1"}, {"id": "2"}]),
90
+ ],
97
91
  "stream1",
98
92
  json_schema={
99
93
  "type": "object",
@@ -105,8 +99,9 @@ _stream_with_primary_key = _MockStream(
105
99
  )
106
100
 
107
101
  _stream2 = _MockStream(
108
- None,
109
- {None: [{"id": "A"}, {"id": "B"}]},
102
+ [
103
+ (None, [{"id": "A"}, {"id": "B"}]),
104
+ ],
110
105
  "stream2",
111
106
  json_schema={
112
107
  "type": "object",
@@ -117,8 +112,9 @@ _stream2 = _MockStream(
117
112
  )
118
113
 
119
114
  _stream_with_single_slice = _MockStream(
120
- "slice_key",
121
- {"s1": [{"id": "1"}, {"id": "2"}]},
115
+ [
116
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
117
+ ],
122
118
  "stream1",
123
119
  json_schema={
124
120
  "type": "object",
@@ -129,11 +125,10 @@ _stream_with_single_slice = _MockStream(
129
125
  )
130
126
 
131
127
  _stream_with_multiple_slices = _MockStream(
132
- "slice_key",
133
- {
134
- "s1": [{"id": "1"}, {"id": "2"}],
135
- "s2": [{"id": "3"}, {"id": "4"}],
136
- },
128
+ [
129
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
130
+ ({"slice_key": "s2"}, [{"id": "3"}, {"id": "4"}]),
131
+ ],
137
132
  "stream1",
138
133
  json_schema={
139
134
  "type": "object",
@@ -384,3 +379,125 @@ test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two
384
379
  )
385
380
  .build()
386
381
  )
382
+
383
+
384
+ test_incremental_stream_with_slice_boundaries = (
385
+ TestScenarioBuilder()
386
+ .set_name("test_incremental_stream_with_slice_boundaries")
387
+ .set_config({})
388
+ .set_source_builder(
389
+ StreamFacadeSourceBuilder()
390
+ .set_streams(
391
+ [
392
+ _MockStream(
393
+ [
394
+ ({"from": 0, "to": 1}, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 1}]),
395
+ ({"from": 1, "to": 2}, [{"id": "3", "cursor_field": 2}, {"id": "4", "cursor_field": 3}]),
396
+ ],
397
+ "stream1",
398
+ json_schema={
399
+ "type": "object",
400
+ "properties": {
401
+ "id": {"type": ["null", "string"]},
402
+ },
403
+ },
404
+ )
405
+ ]
406
+ )
407
+ .set_incremental(CursorField("cursor_field"), ("from", "to"))
408
+ )
409
+ .set_expected_records(
410
+ [
411
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
412
+ {"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
413
+ {"stream1": {"slices": [{"start": 0, "end": 1}]}},
414
+ {"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
415
+ {"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
416
+ {"stream1": {"slices": [{"start": 0, "end": 1}, {"start": 1, "end": 2}]}},
417
+ ]
418
+ )
419
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
420
+ .set_incremental_scenario_config(
421
+ IncrementalScenarioConfig(
422
+ input_state=[],
423
+ )
424
+ )
425
+ .build()
426
+ )
427
+
428
+
429
+ _NO_SLICE_BOUNDARIES = None
430
+ test_incremental_stream_without_slice_boundaries = (
431
+ TestScenarioBuilder()
432
+ .set_name("test_incremental_stream_without_slice_boundaries")
433
+ .set_config({})
434
+ .set_source_builder(
435
+ StreamFacadeSourceBuilder()
436
+ .set_streams(
437
+ [
438
+ _MockStream(
439
+ [
440
+ (None, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 3}]),
441
+ ],
442
+ "stream1",
443
+ json_schema={
444
+ "type": "object",
445
+ "properties": {
446
+ "id": {"type": ["null", "string"]},
447
+ },
448
+ },
449
+ )
450
+ ]
451
+ )
452
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
453
+ )
454
+ .set_expected_records(
455
+ [
456
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
457
+ {"data": {"id": "2", "cursor_field": 3}, "stream": "stream1"},
458
+ {"stream1": {"slices": [{"start": 0, "end": 3}]}},
459
+ ]
460
+ )
461
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
462
+ .set_incremental_scenario_config(
463
+ IncrementalScenarioConfig(
464
+ input_state=[],
465
+ )
466
+ )
467
+ .build()
468
+ )
469
+
470
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries = (
471
+ TestScenarioBuilder()
472
+ .set_name("test_incremental_stream_with_many_slices_byt_without_slice_boundaries")
473
+ .set_config({})
474
+ .set_source_builder(
475
+ StreamFacadeSourceBuilder()
476
+ .set_streams(
477
+ [
478
+ _MockStream(
479
+ [
480
+ ({"parent_id": 1}, [{"id": "1", "cursor_field": 0}]),
481
+ ({"parent_id": 309}, [{"id": "3", "cursor_field": 0}]),
482
+ ],
483
+ "stream1",
484
+ json_schema={
485
+ "type": "object",
486
+ "properties": {
487
+ "id": {"type": ["null", "string"]},
488
+ },
489
+ },
490
+ )
491
+ ]
492
+ )
493
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
494
+ )
495
+ .set_expected_read_error(ValueError, "test exception")
496
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
497
+ .set_incremental_scenario_config(
498
+ IncrementalScenarioConfig(
499
+ input_state=[],
500
+ )
501
+ )
502
+ .build()
503
+ )
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
11
11
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
12
12
  from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
13
13
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
14
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
15
+ test_incremental_stream_with_slice_boundaries,
16
+ test_incremental_stream_without_slice_boundaries,
14
17
  test_stream_facade_multiple_streams,
15
18
  test_stream_facade_raises_exception,
16
19
  test_stream_facade_single_stream,
@@ -43,6 +46,9 @@ scenarios = [
43
46
  test_stream_facade_single_stream_with_multiple_slices,
44
47
  test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
45
48
  test_stream_facade_raises_exception,
49
+ test_incremental_stream_with_slice_boundaries,
50
+ test_incremental_stream_without_slice_boundaries,
51
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
46
52
  ]
47
53
 
48
54
 
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
16
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
16
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
17
18
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
18
19
  from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
20
+ from airbyte_cdk.sources.streams.core import StreamData
19
21
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
20
22
  from airbyte_protocol.models import ConfiguredAirbyteStream
21
23
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
22
24
 
23
25
 
26
+ class LegacyStream(Stream):
27
+ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
28
+ return None
29
+
30
+ def read_records(
31
+ self,
32
+ sync_mode: SyncMode,
33
+ cursor_field: Optional[List[str]] = None,
34
+ stream_slice: Optional[Mapping[str, Any]] = None,
35
+ stream_state: Optional[Mapping[str, Any]] = None,
36
+ ) -> Iterable[StreamData]:
37
+ yield from []
38
+
39
+
24
40
  class ConcurrentCdkSource(AbstractSource):
25
41
  def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
26
42
  self._streams = streams
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
31
47
  return True, None
32
48
 
33
49
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
34
- return [StreamFacade(s) for s in self._streams]
50
+ return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
35
51
 
36
52
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
37
53
  return ConnectorSpecification(connectionSpecification={})
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
40
56
  return ConfiguredAirbyteCatalog(
41
57
  streams=[
42
58
  ConfiguredAirbyteStream(
43
- stream=StreamFacade(s).as_airbyte_stream(),
59
+ stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
44
60
  sync_mode=SyncMode.full_refresh,
45
61
  destination_sync_mode=DestinationSyncMode.overwrite,
46
62
  )
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
57
73
  def __init__(self, partitions: List[Partition]):
58
74
  self._partitions = partitions
59
75
 
60
- def generate(self, sync_mode: SyncMode) -> Iterable[Partition]:
76
+ def generate(self) -> Iterable[Partition]:
61
77
  yield from self._partitions
62
78
 
63
79
 
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
17
17
  StreamPartitionGenerator,
18
18
  )
19
19
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
20
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
20
21
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
21
22
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
23
+ from airbyte_cdk.sources.streams.core import Stream
22
24
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
23
25
 
26
+ _ANY_SYNC_MODE = SyncMode.full_refresh
27
+ _ANY_STATE = {"state_key": "state_value"}
28
+ _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
29
+
24
30
 
25
31
  @pytest.mark.parametrize(
26
32
  "stream_availability, expected_available, expected_message",
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
71
77
  stream_slices = [{"slice": 1}, {"slice": 2}]
72
78
  stream.stream_slices.return_value = stream_slices
73
79
 
74
- partition_generator = StreamPartitionGenerator(stream, message_repository)
80
+ partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
75
81
 
76
- partitions = list(partition_generator.generate(sync_mode))
82
+ partitions = list(partition_generator.generate())
77
83
  slices = [partition.to_slice() for partition in partitions]
78
84
  assert slices == stream_slices
85
+ stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
79
86
 
80
87
 
81
88
  @pytest.mark.parametrize(
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
95
102
  stream.transformer = transformer
96
103
  message_repository = InMemoryMessageRepository()
97
104
  _slice = None
98
- partition = StreamPartition(stream, _slice, message_repository)
105
+ sync_mode = SyncMode.full_refresh
106
+ cursor_field = None
107
+ state = None
108
+ partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
99
109
 
100
110
  a_log_message = AirbyteMessage(
101
111
  type=MessageType.LOG,
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
128
138
 
129
139
  message_repository = InMemoryMessageRepository()
130
140
  _slice = None
131
- partition = StreamPartition(stream, _slice, message_repository)
141
+
142
+ partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
132
143
 
133
144
  stream.read_records.side_effect = Exception()
134
145
 
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
148
159
  def test_stream_partition_hash(_slice, expected_hash):
149
160
  stream = Mock()
150
161
  stream.name = "stream"
151
- partition = StreamPartition(stream, _slice, Mock())
162
+ partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
152
163
 
153
164
  _hash = partition.__hash__()
154
165
  assert _hash == expected_hash
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
163
174
  json_schema={"type": "object"},
164
175
  supported_sync_modes=[SyncMode.full_refresh],
165
176
  )
166
- self._facade = StreamFacade(self._abstract_stream)
177
+ self._legacy_stream = Mock(spec=Stream)
178
+ self._cursor = Mock(spec=Cursor)
179
+ self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
167
180
  self._logger = Mock()
168
181
  self._source = Mock()
169
182
  self._max_workers = 10
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
191
204
  assert self._facade.get_json_schema() == json_schema
192
205
  self._abstract_stream.get_json_schema.assert_called_once_with()
193
206
 
194
- def test_supports_incremental_is_false(self):
195
- assert self._facade.supports_incremental is False
207
+ def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
208
+ assert (
209
+ StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
210
+ == self._legacy_stream.supports_incremental
211
+ )
212
+
213
+ def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
214
+ assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
196
215
 
197
216
  def test_check_availability_is_delegated_to_wrapped_stream(self):
198
217
  availability = StreamAvailable()
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
219
238
  assert actual_stream_data == expected_stream_data
220
239
 
221
240
  def test_read_records_incremental(self):
222
- with self.assertRaises(NotImplementedError):
223
- list(self._facade.read_records(SyncMode.incremental, None, None, None))
241
+ expected_stream_data = [{"data": 1}, {"data": 2}]
242
+ records = [Record(data) for data in expected_stream_data]
243
+ self._abstract_stream.read.return_value = records
244
+
245
+ actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
246
+
247
+ assert actual_stream_data == expected_stream_data
224
248
 
225
249
  def test_create_from_stream_stream(self):
226
250
  stream = Mock()
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
228
252
  stream.primary_key = "id"
229
253
  stream.cursor_field = "cursor"
230
254
 
231
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
255
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
232
256
 
233
257
  assert facade.name == "stream"
234
258
  assert facade.cursor_field == "cursor"
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
240
264
  stream.primary_key = None
241
265
  stream.cursor_field = []
242
266
 
243
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
267
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
244
268
  facade._abstract_stream._primary_key is None
245
269
 
246
270
  def test_create_from_stream_with_composite_primary_key(self):
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
249
273
  stream.primary_key = ["id", "name"]
250
274
  stream.cursor_field = []
251
275
 
252
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
276
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
253
277
  facade._abstract_stream._primary_key == ["id", "name"]
254
278
 
255
279
  def test_create_from_stream_with_empty_list_cursor(self):
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
257
281
  stream.primary_key = "id"
258
282
  stream.cursor_field = []
259
283
 
260
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
284
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
261
285
 
262
286
  assert facade.cursor_field == []
263
287
 
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
267
291
  stream.primary_key = [["field", "id"]]
268
292
 
269
293
  with self.assertRaises(ValueError):
270
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
294
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
271
295
 
272
296
  def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
273
297
  stream = Mock()
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
275
299
  stream.primary_key = 123
276
300
 
277
301
  with self.assertRaises(ValueError):
278
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
302
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
279
303
 
280
304
  def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
281
305
  stream = Mock()
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
284
308
  stream.cursor_field = ["field", "cursor"]
285
309
 
286
310
  with self.assertRaises(ValueError):
287
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
311
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
288
312
 
289
313
  def test_create_from_stream_with_cursor_field_as_list(self):
290
314
  stream = Mock()
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
292
316
  stream.primary_key = "id"
293
317
  stream.cursor_field = ["cursor"]
294
318
 
295
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
319
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
296
320
  assert facade.cursor_field == "cursor"
297
321
 
298
322
  def test_create_from_stream_none_message_repository(self):
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
302
326
  self._source.message_repository = None
303
327
 
304
328
  with self.assertRaises(ValueError):
305
- StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
329
+ StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
306
330
 
307
331
  def test_get_error_display_message_no_display_message(self):
308
332
  self._stream.get_error_display_message.return_value = "display_message"
309
333
 
310
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
334
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
311
335
 
312
336
  expected_display_message = None
313
337
  e = Exception()
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
319
343
  def test_get_error_display_message_with_display_message(self):
320
344
  self._stream.get_error_display_message.return_value = "display_message"
321
345
 
322
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
346
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
323
347
 
324
348
  expected_display_message = "display_message"
325
349
  e = ExceptionWithDisplayMessage("display_message")
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
338
362
  )
339
363
  def test_get_error_display_message(exception, expected_display_message):
340
364
  stream = Mock()
341
- facade = StreamFacade(stream)
365
+ legacy_stream = Mock()
366
+ cursor = Mock(spec=Cursor)
367
+ facade = StreamFacade(stream, legacy_stream, cursor)
342
368
 
343
369
  display_message = facade.get_error_display_message(exception)
344
370
 
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
21
21
 
22
22
  stream = Mock()
23
23
  message_repository = Mock()
24
- partitions = [StreamPartition(stream, s, message_repository) for s in slices]
25
- stream.generate.return_value = iter(partitions)
26
-
27
24
  sync_mode = SyncMode.full_refresh
25
+ cursor_field = None
26
+ state = None
27
+ partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
28
+ stream.generate.return_value = iter(partitions)
28
29
 
29
- partition_generator.generate_partitions(stream, sync_mode)
30
+ partition_generator.generate_partitions(stream)
30
31
 
31
32
  actual_partitions = []
32
33
  while partition := queue.get(False):