airbyte-cdk 0.52.6__py3-none-any.whl → 0.52.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. airbyte_cdk/destinations/vector_db_based/config.py +1 -0
  2. airbyte_cdk/sources/abstract_source.py +12 -61
  3. airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -1
  4. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +1 -2
  5. airbyte_cdk/sources/message/repository.py +0 -6
  6. airbyte_cdk/sources/source.py +14 -13
  7. airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
  8. airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
  9. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
  10. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
  11. airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
  12. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
  13. airbyte_cdk/sources/streams/core.py +71 -1
  14. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +3 -3
  15. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +32 -30
  16. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +1 -1
  17. unit_tests/sources/file_based/file_types/test_unstructured_parser.py +5 -0
  18. unit_tests/sources/file_based/scenarios/csv_scenarios.py +1 -1
  19. unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +16 -0
  20. unit_tests/sources/message/test_repository.py +7 -20
  21. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
  22. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
  23. unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
  24. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
  25. unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
  26. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
  27. unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
  28. unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
  29. unit_tests/sources/streams/test_stream_read.py +3 -1
  30. unit_tests/sources/test_abstract_source.py +12 -9
  31. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
  32. {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -6,27 +6,61 @@ from typing import Any, List, Mapping, Optional, Tuple, Union
6
6
 
7
7
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
8
8
  from airbyte_cdk.sources import AbstractSource
9
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
9
10
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
10
11
  from airbyte_cdk.sources.streams import Stream
11
12
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
13
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
12
14
  from airbyte_protocol.models import ConfiguredAirbyteStream
13
15
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
14
16
 
17
+ _NO_STATE = None
18
+
15
19
 
16
20
  class StreamFacadeSource(AbstractSource):
17
- def __init__(self, streams: List[Stream], max_workers: int):
21
+ def __init__(
22
+ self,
23
+ streams: List[Stream],
24
+ max_workers: int,
25
+ cursor_field: Optional[CursorField] = None,
26
+ cursor_boundaries: Optional[Tuple[str, str]] = None,
27
+ ):
18
28
  self._streams = streams
19
29
  self._max_workers = max_workers
30
+ self._message_repository = InMemoryMessageRepository()
31
+ self._cursor_field = cursor_field
32
+ self._cursor_boundaries = cursor_boundaries
20
33
 
21
34
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
22
35
  return True, None
23
36
 
24
37
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
25
- return [StreamFacade.create_from_stream(stream, self, stream.logger, self._max_workers) for stream in self._streams]
38
+ state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=_NO_STATE)
39
+ return [
40
+ StreamFacade.create_from_stream(
41
+ stream,
42
+ self,
43
+ stream.logger,
44
+ self._max_workers,
45
+ _NO_STATE,
46
+ ConcurrentCursor(
47
+ stream.name,
48
+ stream.namespace,
49
+ _NO_STATE,
50
+ self.message_repository, # type: ignore # for this source specifically, we always return `InMemoryMessageRepository`
51
+ state_manager,
52
+ self._cursor_field,
53
+ self._cursor_boundaries,
54
+ )
55
+ if self._cursor_field
56
+ else NoopCursor(),
57
+ )
58
+ for stream in self._streams
59
+ ]
26
60
 
27
61
  @property
28
62
  def message_repository(self) -> Union[None, MessageRepository]:
29
- return InMemoryMessageRepository()
63
+ return self._message_repository
30
64
 
31
65
  def spec(self, logger: logging.Logger) -> ConnectorSpecification:
32
66
  return ConnectorSpecification(connectionSpecification={})
@@ -49,14 +83,21 @@ class StreamFacadeSourceBuilder(SourceBuilder[StreamFacadeSource]):
49
83
  self._source = None
50
84
  self._streams = []
51
85
  self._max_workers = 1
86
+ self._cursor_field = None
87
+ self._cursor_boundaries = None
52
88
 
53
89
  def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder":
54
90
  self._streams = streams
55
91
  return self
56
92
 
57
- def set_max_workers(self, max_workers: int):
93
+ def set_max_workers(self, max_workers: int) -> "StreamFacadeSourceBuilder":
58
94
  self._max_workers = max_workers
59
95
  return self
60
96
 
97
+ def set_incremental(self, cursor_field: CursorField, cursor_boundaries: Optional[Tuple[str, str]]) -> "StreamFacadeSourceBuilder":
98
+ self._cursor_field = cursor_field
99
+ self._cursor_boundaries = cursor_boundaries
100
+ return self
101
+
61
102
  def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> StreamFacadeSource:
62
- return StreamFacadeSource(self._streams, self._max_workers)
103
+ return StreamFacadeSource(self._streams, self._max_workers, self._cursor_field, self._cursor_boundaries)
@@ -1,26 +1,25 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
- from typing import Any, Iterable, List, Mapping, Optional, Union
4
+ from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
5
5
 
6
6
  from airbyte_cdk.models import SyncMode
7
7
  from airbyte_cdk.sources.streams import Stream
8
+ from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
8
9
  from airbyte_cdk.sources.streams.core import StreamData
9
- from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
10
+ from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
10
11
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_builder import StreamFacadeSourceBuilder
11
12
 
12
13
 
13
14
  class _MockStream(Stream):
14
15
  def __init__(
15
16
  self,
16
- slice_key,
17
- slice_values_to_records_or_exception: Mapping[Optional[str], List[Union[Mapping[str, Any], Exception]]],
17
+ slices_and_records_or_exception: Iterable[Tuple[Optional[Mapping[str, Any]], Iterable[Union[Exception, Mapping[str, Any]]]]],
18
18
  name,
19
19
  json_schema,
20
20
  primary_key=None,
21
21
  ):
22
- self._slice_key = slice_key
23
- self._slice_values_to_records = slice_values_to_records_or_exception
22
+ self._slices_and_records_or_exception = slices_and_records_or_exception
24
23
  self._name = name
25
24
  self._json_schema = json_schema
26
25
  self._primary_key = primary_key
@@ -32,19 +31,12 @@ class _MockStream(Stream):
32
31
  stream_slice: Optional[Mapping[str, Any]] = None,
33
32
  stream_state: Optional[Mapping[str, Any]] = None,
34
33
  ) -> Iterable[StreamData]:
35
- for record_or_exception in self._get_record_or_exception_iterable(stream_slice):
36
- if isinstance(record_or_exception, Exception):
37
- raise record_or_exception
38
- else:
39
- yield record_or_exception
40
-
41
- def _get_record_or_exception_iterable(
42
- self, stream_slice: Optional[Mapping[str, Any]] = None
43
- ) -> Iterable[Union[Mapping[str, Any], Exception]]:
44
- if stream_slice is None:
45
- return self._slice_values_to_records[None]
46
- else:
47
- return self._slice_values_to_records[stream_slice[self._slice_key]]
34
+ for _slice, records_or_exception in self._slices_and_records_or_exception:
35
+ if stream_slice == _slice:
36
+ for item in records_or_exception:
37
+ if isinstance(item, Exception):
38
+ raise item
39
+ yield item
48
40
 
49
41
  @property
50
42
  def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
@@ -60,16 +52,16 @@ class _MockStream(Stream):
60
52
  def stream_slices(
61
53
  self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
62
54
  ) -> Iterable[Optional[Mapping[str, Any]]]:
63
- if self._slice_key:
64
- for slice_value in self._slice_values_to_records.keys():
65
- yield {self._slice_key: slice_value}
55
+ if self._slices_and_records_or_exception:
56
+ yield from [_slice for _slice, records_or_exception in self._slices_and_records_or_exception]
66
57
  else:
67
58
  yield None
68
59
 
69
60
 
70
61
  _stream1 = _MockStream(
71
- None,
72
- {None: [{"id": "1"}, {"id": "2"}]},
62
+ [
63
+ (None, [{"id": "1"}, {"id": "2"}]),
64
+ ],
73
65
  "stream1",
74
66
  json_schema={
75
67
  "type": "object",
@@ -80,8 +72,9 @@ _stream1 = _MockStream(
80
72
  )
81
73
 
82
74
  _stream_raising_exception = _MockStream(
83
- None,
84
- {None: [{"id": "1"}, ValueError("test exception")]},
75
+ [
76
+ (None, [{"id": "1"}, ValueError("test exception")]),
77
+ ],
85
78
  "stream1",
86
79
  json_schema={
87
80
  "type": "object",
@@ -92,8 +85,9 @@ _stream_raising_exception = _MockStream(
92
85
  )
93
86
 
94
87
  _stream_with_primary_key = _MockStream(
95
- None,
96
- {None: [{"id": "1"}, {"id": "2"}]},
88
+ [
89
+ (None, [{"id": "1"}, {"id": "2"}]),
90
+ ],
97
91
  "stream1",
98
92
  json_schema={
99
93
  "type": "object",
@@ -105,8 +99,9 @@ _stream_with_primary_key = _MockStream(
105
99
  )
106
100
 
107
101
  _stream2 = _MockStream(
108
- None,
109
- {None: [{"id": "A"}, {"id": "B"}]},
102
+ [
103
+ (None, [{"id": "A"}, {"id": "B"}]),
104
+ ],
110
105
  "stream2",
111
106
  json_schema={
112
107
  "type": "object",
@@ -117,8 +112,9 @@ _stream2 = _MockStream(
117
112
  )
118
113
 
119
114
  _stream_with_single_slice = _MockStream(
120
- "slice_key",
121
- {"s1": [{"id": "1"}, {"id": "2"}]},
115
+ [
116
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
117
+ ],
122
118
  "stream1",
123
119
  json_schema={
124
120
  "type": "object",
@@ -129,11 +125,10 @@ _stream_with_single_slice = _MockStream(
129
125
  )
130
126
 
131
127
  _stream_with_multiple_slices = _MockStream(
132
- "slice_key",
133
- {
134
- "s1": [{"id": "1"}, {"id": "2"}],
135
- "s2": [{"id": "3"}, {"id": "4"}],
136
- },
128
+ [
129
+ ({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
130
+ ({"slice_key": "s2"}, [{"id": "3"}, {"id": "4"}]),
131
+ ],
137
132
  "stream1",
138
133
  json_schema={
139
134
  "type": "object",
@@ -384,3 +379,125 @@ test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two
384
379
  )
385
380
  .build()
386
381
  )
382
+
383
+
384
+ test_incremental_stream_with_slice_boundaries = (
385
+ TestScenarioBuilder()
386
+ .set_name("test_incremental_stream_with_slice_boundaries")
387
+ .set_config({})
388
+ .set_source_builder(
389
+ StreamFacadeSourceBuilder()
390
+ .set_streams(
391
+ [
392
+ _MockStream(
393
+ [
394
+ ({"from": 0, "to": 1}, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 1}]),
395
+ ({"from": 1, "to": 2}, [{"id": "3", "cursor_field": 2}, {"id": "4", "cursor_field": 3}]),
396
+ ],
397
+ "stream1",
398
+ json_schema={
399
+ "type": "object",
400
+ "properties": {
401
+ "id": {"type": ["null", "string"]},
402
+ },
403
+ },
404
+ )
405
+ ]
406
+ )
407
+ .set_incremental(CursorField("cursor_field"), ("from", "to"))
408
+ )
409
+ .set_expected_records(
410
+ [
411
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
412
+ {"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
413
+ {"stream1": {"slices": [{"start": 0, "end": 1}]}},
414
+ {"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
415
+ {"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
416
+ {"stream1": {"slices": [{"start": 0, "end": 1}, {"start": 1, "end": 2}]}},
417
+ ]
418
+ )
419
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
420
+ .set_incremental_scenario_config(
421
+ IncrementalScenarioConfig(
422
+ input_state=[],
423
+ )
424
+ )
425
+ .build()
426
+ )
427
+
428
+
429
+ _NO_SLICE_BOUNDARIES = None
430
+ test_incremental_stream_without_slice_boundaries = (
431
+ TestScenarioBuilder()
432
+ .set_name("test_incremental_stream_without_slice_boundaries")
433
+ .set_config({})
434
+ .set_source_builder(
435
+ StreamFacadeSourceBuilder()
436
+ .set_streams(
437
+ [
438
+ _MockStream(
439
+ [
440
+ (None, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 3}]),
441
+ ],
442
+ "stream1",
443
+ json_schema={
444
+ "type": "object",
445
+ "properties": {
446
+ "id": {"type": ["null", "string"]},
447
+ },
448
+ },
449
+ )
450
+ ]
451
+ )
452
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
453
+ )
454
+ .set_expected_records(
455
+ [
456
+ {"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
457
+ {"data": {"id": "2", "cursor_field": 3}, "stream": "stream1"},
458
+ {"stream1": {"slices": [{"start": 0, "end": 3}]}},
459
+ ]
460
+ )
461
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
462
+ .set_incremental_scenario_config(
463
+ IncrementalScenarioConfig(
464
+ input_state=[],
465
+ )
466
+ )
467
+ .build()
468
+ )
469
+
470
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries = (
471
+ TestScenarioBuilder()
472
+ .set_name("test_incremental_stream_with_many_slices_byt_without_slice_boundaries")
473
+ .set_config({})
474
+ .set_source_builder(
475
+ StreamFacadeSourceBuilder()
476
+ .set_streams(
477
+ [
478
+ _MockStream(
479
+ [
480
+ ({"parent_id": 1}, [{"id": "1", "cursor_field": 0}]),
481
+ ({"parent_id": 309}, [{"id": "3", "cursor_field": 0}]),
482
+ ],
483
+ "stream1",
484
+ json_schema={
485
+ "type": "object",
486
+ "properties": {
487
+ "id": {"type": ["null", "string"]},
488
+ },
489
+ },
490
+ )
491
+ ]
492
+ )
493
+ .set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
494
+ )
495
+ .set_expected_read_error(ValueError, "test exception")
496
+ .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
497
+ .set_incremental_scenario_config(
498
+ IncrementalScenarioConfig(
499
+ input_state=[],
500
+ )
501
+ )
502
+ .build()
503
+ )
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
11
11
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
12
12
  from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
13
13
  from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
14
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
15
+ test_incremental_stream_with_slice_boundaries,
16
+ test_incremental_stream_without_slice_boundaries,
14
17
  test_stream_facade_multiple_streams,
15
18
  test_stream_facade_raises_exception,
16
19
  test_stream_facade_single_stream,
@@ -43,6 +46,9 @@ scenarios = [
43
46
  test_stream_facade_single_stream_with_multiple_slices,
44
47
  test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
45
48
  test_stream_facade_raises_exception,
49
+ test_incremental_stream_with_slice_boundaries,
50
+ test_incremental_stream_without_slice_boundaries,
51
+ test_incremental_stream_with_many_slices_but_without_slice_boundaries,
46
52
  ]
47
53
 
48
54
 
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
15
16
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
16
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
17
18
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
18
19
  from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
20
+ from airbyte_cdk.sources.streams.core import StreamData
19
21
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
20
22
  from airbyte_protocol.models import ConfiguredAirbyteStream
21
23
  from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
22
24
 
23
25
 
26
+ class LegacyStream(Stream):
27
+ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
28
+ return None
29
+
30
+ def read_records(
31
+ self,
32
+ sync_mode: SyncMode,
33
+ cursor_field: Optional[List[str]] = None,
34
+ stream_slice: Optional[Mapping[str, Any]] = None,
35
+ stream_state: Optional[Mapping[str, Any]] = None,
36
+ ) -> Iterable[StreamData]:
37
+ yield from []
38
+
39
+
24
40
  class ConcurrentCdkSource(AbstractSource):
25
41
  def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
26
42
  self._streams = streams
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
31
47
  return True, None
32
48
 
33
49
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
34
- return [StreamFacade(s) for s in self._streams]
50
+ return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
35
51
 
36
52
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
37
53
  return ConnectorSpecification(connectionSpecification={})
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
40
56
  return ConfiguredAirbyteCatalog(
41
57
  streams=[
42
58
  ConfiguredAirbyteStream(
43
- stream=StreamFacade(s).as_airbyte_stream(),
59
+ stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
44
60
  sync_mode=SyncMode.full_refresh,
45
61
  destination_sync_mode=DestinationSyncMode.overwrite,
46
62
  )
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
57
73
  def __init__(self, partitions: List[Partition]):
58
74
  self._partitions = partitions
59
75
 
60
- def generate(self, sync_mode: SyncMode) -> Iterable[Partition]:
76
+ def generate(self) -> Iterable[Partition]:
61
77
  yield from self._partitions
62
78
 
63
79
 
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
17
17
  StreamPartitionGenerator,
18
18
  )
19
19
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
20
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
20
21
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
21
22
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
23
+ from airbyte_cdk.sources.streams.core import Stream
22
24
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
23
25
 
26
+ _ANY_SYNC_MODE = SyncMode.full_refresh
27
+ _ANY_STATE = {"state_key": "state_value"}
28
+ _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
29
+
24
30
 
25
31
  @pytest.mark.parametrize(
26
32
  "stream_availability, expected_available, expected_message",
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
71
77
  stream_slices = [{"slice": 1}, {"slice": 2}]
72
78
  stream.stream_slices.return_value = stream_slices
73
79
 
74
- partition_generator = StreamPartitionGenerator(stream, message_repository)
80
+ partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
75
81
 
76
- partitions = list(partition_generator.generate(sync_mode))
82
+ partitions = list(partition_generator.generate())
77
83
  slices = [partition.to_slice() for partition in partitions]
78
84
  assert slices == stream_slices
85
+ stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
79
86
 
80
87
 
81
88
  @pytest.mark.parametrize(
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
95
102
  stream.transformer = transformer
96
103
  message_repository = InMemoryMessageRepository()
97
104
  _slice = None
98
- partition = StreamPartition(stream, _slice, message_repository)
105
+ sync_mode = SyncMode.full_refresh
106
+ cursor_field = None
107
+ state = None
108
+ partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
99
109
 
100
110
  a_log_message = AirbyteMessage(
101
111
  type=MessageType.LOG,
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
128
138
 
129
139
  message_repository = InMemoryMessageRepository()
130
140
  _slice = None
131
- partition = StreamPartition(stream, _slice, message_repository)
141
+
142
+ partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
132
143
 
133
144
  stream.read_records.side_effect = Exception()
134
145
 
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
148
159
  def test_stream_partition_hash(_slice, expected_hash):
149
160
  stream = Mock()
150
161
  stream.name = "stream"
151
- partition = StreamPartition(stream, _slice, Mock())
162
+ partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
152
163
 
153
164
  _hash = partition.__hash__()
154
165
  assert _hash == expected_hash
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
163
174
  json_schema={"type": "object"},
164
175
  supported_sync_modes=[SyncMode.full_refresh],
165
176
  )
166
- self._facade = StreamFacade(self._abstract_stream)
177
+ self._legacy_stream = Mock(spec=Stream)
178
+ self._cursor = Mock(spec=Cursor)
179
+ self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
167
180
  self._logger = Mock()
168
181
  self._source = Mock()
169
182
  self._max_workers = 10
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
191
204
  assert self._facade.get_json_schema() == json_schema
192
205
  self._abstract_stream.get_json_schema.assert_called_once_with()
193
206
 
194
- def test_supports_incremental_is_false(self):
195
- assert self._facade.supports_incremental is False
207
+ def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
208
+ assert (
209
+ StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
210
+ == self._legacy_stream.supports_incremental
211
+ )
212
+
213
+ def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
214
+ assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
196
215
 
197
216
  def test_check_availability_is_delegated_to_wrapped_stream(self):
198
217
  availability = StreamAvailable()
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
219
238
  assert actual_stream_data == expected_stream_data
220
239
 
221
240
  def test_read_records_incremental(self):
222
- with self.assertRaises(NotImplementedError):
223
- list(self._facade.read_records(SyncMode.incremental, None, None, None))
241
+ expected_stream_data = [{"data": 1}, {"data": 2}]
242
+ records = [Record(data) for data in expected_stream_data]
243
+ self._abstract_stream.read.return_value = records
244
+
245
+ actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
246
+
247
+ assert actual_stream_data == expected_stream_data
224
248
 
225
249
  def test_create_from_stream_stream(self):
226
250
  stream = Mock()
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
228
252
  stream.primary_key = "id"
229
253
  stream.cursor_field = "cursor"
230
254
 
231
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
255
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
232
256
 
233
257
  assert facade.name == "stream"
234
258
  assert facade.cursor_field == "cursor"
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
240
264
  stream.primary_key = None
241
265
  stream.cursor_field = []
242
266
 
243
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
267
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
244
268
  facade._abstract_stream._primary_key is None
245
269
 
246
270
  def test_create_from_stream_with_composite_primary_key(self):
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
249
273
  stream.primary_key = ["id", "name"]
250
274
  stream.cursor_field = []
251
275
 
252
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
276
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
253
277
  facade._abstract_stream._primary_key == ["id", "name"]
254
278
 
255
279
  def test_create_from_stream_with_empty_list_cursor(self):
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
257
281
  stream.primary_key = "id"
258
282
  stream.cursor_field = []
259
283
 
260
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
284
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
261
285
 
262
286
  assert facade.cursor_field == []
263
287
 
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
267
291
  stream.primary_key = [["field", "id"]]
268
292
 
269
293
  with self.assertRaises(ValueError):
270
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
294
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
271
295
 
272
296
  def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
273
297
  stream = Mock()
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
275
299
  stream.primary_key = 123
276
300
 
277
301
  with self.assertRaises(ValueError):
278
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
302
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
279
303
 
280
304
  def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
281
305
  stream = Mock()
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
284
308
  stream.cursor_field = ["field", "cursor"]
285
309
 
286
310
  with self.assertRaises(ValueError):
287
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
311
+ StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
288
312
 
289
313
  def test_create_from_stream_with_cursor_field_as_list(self):
290
314
  stream = Mock()
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
292
316
  stream.primary_key = "id"
293
317
  stream.cursor_field = ["cursor"]
294
318
 
295
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
319
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
296
320
  assert facade.cursor_field == "cursor"
297
321
 
298
322
  def test_create_from_stream_none_message_repository(self):
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
302
326
  self._source.message_repository = None
303
327
 
304
328
  with self.assertRaises(ValueError):
305
- StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
329
+ StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
306
330
 
307
331
  def test_get_error_display_message_no_display_message(self):
308
332
  self._stream.get_error_display_message.return_value = "display_message"
309
333
 
310
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
334
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
311
335
 
312
336
  expected_display_message = None
313
337
  e = Exception()
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
319
343
  def test_get_error_display_message_with_display_message(self):
320
344
  self._stream.get_error_display_message.return_value = "display_message"
321
345
 
322
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
346
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
323
347
 
324
348
  expected_display_message = "display_message"
325
349
  e = ExceptionWithDisplayMessage("display_message")
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
338
362
  )
339
363
  def test_get_error_display_message(exception, expected_display_message):
340
364
  stream = Mock()
341
- facade = StreamFacade(stream)
365
+ legacy_stream = Mock()
366
+ cursor = Mock(spec=Cursor)
367
+ facade = StreamFacade(stream, legacy_stream, cursor)
342
368
 
343
369
  display_message = facade.get_error_display_message(exception)
344
370
 
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
21
21
 
22
22
  stream = Mock()
23
23
  message_repository = Mock()
24
- partitions = [StreamPartition(stream, s, message_repository) for s in slices]
25
- stream.generate.return_value = iter(partitions)
26
-
27
24
  sync_mode = SyncMode.full_refresh
25
+ cursor_field = None
26
+ state = None
27
+ partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
28
+ stream.generate.return_value = iter(partitions)
28
29
 
29
- partition_generator.generate_partitions(stream, sync_mode)
30
+ partition_generator.generate_partitions(stream)
30
31
 
31
32
  actual_partitions = []
32
33
  while partition := queue.get(False):