airbyte-cdk 0.52.6__py3-none-any.whl → 0.52.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/destinations/vector_db_based/config.py +1 -0
- airbyte_cdk/sources/abstract_source.py +12 -61
- airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -1
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +1 -2
- airbyte_cdk/sources/message/repository.py +0 -6
- airbyte_cdk/sources/source.py +14 -13
- airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
- airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
- airbyte_cdk/sources/streams/core.py +71 -1
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +3 -3
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +32 -30
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +1 -1
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +5 -0
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +1 -1
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +16 -0
- unit_tests/sources/message/test_repository.py +7 -20
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
- unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
- unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
- unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
- unit_tests/sources/streams/test_stream_read.py +3 -1
- unit_tests/sources/test_abstract_source.py +12 -9
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -6,27 +6,61 @@ from typing import Any, List, Mapping, Optional, Tuple, Union
|
|
6
6
|
|
7
7
|
from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
|
8
8
|
from airbyte_cdk.sources import AbstractSource
|
9
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
9
10
|
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
10
11
|
from airbyte_cdk.sources.streams import Stream
|
11
12
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
13
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
|
12
14
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
13
15
|
from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
|
14
16
|
|
17
|
+
_NO_STATE = None
|
18
|
+
|
15
19
|
|
16
20
|
class StreamFacadeSource(AbstractSource):
|
17
|
-
def __init__(
|
21
|
+
def __init__(
|
22
|
+
self,
|
23
|
+
streams: List[Stream],
|
24
|
+
max_workers: int,
|
25
|
+
cursor_field: Optional[CursorField] = None,
|
26
|
+
cursor_boundaries: Optional[Tuple[str, str]] = None,
|
27
|
+
):
|
18
28
|
self._streams = streams
|
19
29
|
self._max_workers = max_workers
|
30
|
+
self._message_repository = InMemoryMessageRepository()
|
31
|
+
self._cursor_field = cursor_field
|
32
|
+
self._cursor_boundaries = cursor_boundaries
|
20
33
|
|
21
34
|
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
|
22
35
|
return True, None
|
23
36
|
|
24
37
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
25
|
-
|
38
|
+
state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=_NO_STATE)
|
39
|
+
return [
|
40
|
+
StreamFacade.create_from_stream(
|
41
|
+
stream,
|
42
|
+
self,
|
43
|
+
stream.logger,
|
44
|
+
self._max_workers,
|
45
|
+
_NO_STATE,
|
46
|
+
ConcurrentCursor(
|
47
|
+
stream.name,
|
48
|
+
stream.namespace,
|
49
|
+
_NO_STATE,
|
50
|
+
self.message_repository, # type: ignore # for this source specifically, we always return `InMemoryMessageRepository`
|
51
|
+
state_manager,
|
52
|
+
self._cursor_field,
|
53
|
+
self._cursor_boundaries,
|
54
|
+
)
|
55
|
+
if self._cursor_field
|
56
|
+
else NoopCursor(),
|
57
|
+
)
|
58
|
+
for stream in self._streams
|
59
|
+
]
|
26
60
|
|
27
61
|
@property
|
28
62
|
def message_repository(self) -> Union[None, MessageRepository]:
|
29
|
-
return
|
63
|
+
return self._message_repository
|
30
64
|
|
31
65
|
def spec(self, logger: logging.Logger) -> ConnectorSpecification:
|
32
66
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -49,14 +83,21 @@ class StreamFacadeSourceBuilder(SourceBuilder[StreamFacadeSource]):
|
|
49
83
|
self._source = None
|
50
84
|
self._streams = []
|
51
85
|
self._max_workers = 1
|
86
|
+
self._cursor_field = None
|
87
|
+
self._cursor_boundaries = None
|
52
88
|
|
53
89
|
def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder":
|
54
90
|
self._streams = streams
|
55
91
|
return self
|
56
92
|
|
57
|
-
def set_max_workers(self, max_workers: int):
|
93
|
+
def set_max_workers(self, max_workers: int) -> "StreamFacadeSourceBuilder":
|
58
94
|
self._max_workers = max_workers
|
59
95
|
return self
|
60
96
|
|
97
|
+
def set_incremental(self, cursor_field: CursorField, cursor_boundaries: Optional[Tuple[str, str]]) -> "StreamFacadeSourceBuilder":
|
98
|
+
self._cursor_field = cursor_field
|
99
|
+
self._cursor_boundaries = cursor_boundaries
|
100
|
+
return self
|
101
|
+
|
61
102
|
def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> StreamFacadeSource:
|
62
|
-
return StreamFacadeSource(self._streams, self._max_workers)
|
103
|
+
return StreamFacadeSource(self._streams, self._max_workers, self._cursor_field, self._cursor_boundaries)
|
@@ -1,26 +1,25 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
from typing import Any, Iterable, List, Mapping, Optional, Union
|
4
|
+
from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
|
5
5
|
|
6
6
|
from airbyte_cdk.models import SyncMode
|
7
7
|
from airbyte_cdk.sources.streams import Stream
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
8
9
|
from airbyte_cdk.sources.streams.core import StreamData
|
9
|
-
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
|
10
|
+
from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
|
10
11
|
from unit_tests.sources.streams.concurrent.scenarios.stream_facade_builder import StreamFacadeSourceBuilder
|
11
12
|
|
12
13
|
|
13
14
|
class _MockStream(Stream):
|
14
15
|
def __init__(
|
15
16
|
self,
|
16
|
-
|
17
|
-
slice_values_to_records_or_exception: Mapping[Optional[str], List[Union[Mapping[str, Any], Exception]]],
|
17
|
+
slices_and_records_or_exception: Iterable[Tuple[Optional[Mapping[str, Any]], Iterable[Union[Exception, Mapping[str, Any]]]]],
|
18
18
|
name,
|
19
19
|
json_schema,
|
20
20
|
primary_key=None,
|
21
21
|
):
|
22
|
-
self.
|
23
|
-
self._slice_values_to_records = slice_values_to_records_or_exception
|
22
|
+
self._slices_and_records_or_exception = slices_and_records_or_exception
|
24
23
|
self._name = name
|
25
24
|
self._json_schema = json_schema
|
26
25
|
self._primary_key = primary_key
|
@@ -32,19 +31,12 @@ class _MockStream(Stream):
|
|
32
31
|
stream_slice: Optional[Mapping[str, Any]] = None,
|
33
32
|
stream_state: Optional[Mapping[str, Any]] = None,
|
34
33
|
) -> Iterable[StreamData]:
|
35
|
-
for
|
36
|
-
if
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
def _get_record_or_exception_iterable(
|
42
|
-
self, stream_slice: Optional[Mapping[str, Any]] = None
|
43
|
-
) -> Iterable[Union[Mapping[str, Any], Exception]]:
|
44
|
-
if stream_slice is None:
|
45
|
-
return self._slice_values_to_records[None]
|
46
|
-
else:
|
47
|
-
return self._slice_values_to_records[stream_slice[self._slice_key]]
|
34
|
+
for _slice, records_or_exception in self._slices_and_records_or_exception:
|
35
|
+
if stream_slice == _slice:
|
36
|
+
for item in records_or_exception:
|
37
|
+
if isinstance(item, Exception):
|
38
|
+
raise item
|
39
|
+
yield item
|
48
40
|
|
49
41
|
@property
|
50
42
|
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
@@ -60,16 +52,16 @@ class _MockStream(Stream):
|
|
60
52
|
def stream_slices(
|
61
53
|
self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
|
62
54
|
) -> Iterable[Optional[Mapping[str, Any]]]:
|
63
|
-
if self.
|
64
|
-
for
|
65
|
-
yield {self._slice_key: slice_value}
|
55
|
+
if self._slices_and_records_or_exception:
|
56
|
+
yield from [_slice for _slice, records_or_exception in self._slices_and_records_or_exception]
|
66
57
|
else:
|
67
58
|
yield None
|
68
59
|
|
69
60
|
|
70
61
|
_stream1 = _MockStream(
|
71
|
-
|
72
|
-
|
62
|
+
[
|
63
|
+
(None, [{"id": "1"}, {"id": "2"}]),
|
64
|
+
],
|
73
65
|
"stream1",
|
74
66
|
json_schema={
|
75
67
|
"type": "object",
|
@@ -80,8 +72,9 @@ _stream1 = _MockStream(
|
|
80
72
|
)
|
81
73
|
|
82
74
|
_stream_raising_exception = _MockStream(
|
83
|
-
|
84
|
-
|
75
|
+
[
|
76
|
+
(None, [{"id": "1"}, ValueError("test exception")]),
|
77
|
+
],
|
85
78
|
"stream1",
|
86
79
|
json_schema={
|
87
80
|
"type": "object",
|
@@ -92,8 +85,9 @@ _stream_raising_exception = _MockStream(
|
|
92
85
|
)
|
93
86
|
|
94
87
|
_stream_with_primary_key = _MockStream(
|
95
|
-
|
96
|
-
|
88
|
+
[
|
89
|
+
(None, [{"id": "1"}, {"id": "2"}]),
|
90
|
+
],
|
97
91
|
"stream1",
|
98
92
|
json_schema={
|
99
93
|
"type": "object",
|
@@ -105,8 +99,9 @@ _stream_with_primary_key = _MockStream(
|
|
105
99
|
)
|
106
100
|
|
107
101
|
_stream2 = _MockStream(
|
108
|
-
|
109
|
-
|
102
|
+
[
|
103
|
+
(None, [{"id": "A"}, {"id": "B"}]),
|
104
|
+
],
|
110
105
|
"stream2",
|
111
106
|
json_schema={
|
112
107
|
"type": "object",
|
@@ -117,8 +112,9 @@ _stream2 = _MockStream(
|
|
117
112
|
)
|
118
113
|
|
119
114
|
_stream_with_single_slice = _MockStream(
|
120
|
-
|
121
|
-
|
115
|
+
[
|
116
|
+
({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
|
117
|
+
],
|
122
118
|
"stream1",
|
123
119
|
json_schema={
|
124
120
|
"type": "object",
|
@@ -129,11 +125,10 @@ _stream_with_single_slice = _MockStream(
|
|
129
125
|
)
|
130
126
|
|
131
127
|
_stream_with_multiple_slices = _MockStream(
|
132
|
-
|
133
|
-
|
134
|
-
"
|
135
|
-
|
136
|
-
},
|
128
|
+
[
|
129
|
+
({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
|
130
|
+
({"slice_key": "s2"}, [{"id": "3"}, {"id": "4"}]),
|
131
|
+
],
|
137
132
|
"stream1",
|
138
133
|
json_schema={
|
139
134
|
"type": "object",
|
@@ -384,3 +379,125 @@ test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two
|
|
384
379
|
)
|
385
380
|
.build()
|
386
381
|
)
|
382
|
+
|
383
|
+
|
384
|
+
test_incremental_stream_with_slice_boundaries = (
|
385
|
+
TestScenarioBuilder()
|
386
|
+
.set_name("test_incremental_stream_with_slice_boundaries")
|
387
|
+
.set_config({})
|
388
|
+
.set_source_builder(
|
389
|
+
StreamFacadeSourceBuilder()
|
390
|
+
.set_streams(
|
391
|
+
[
|
392
|
+
_MockStream(
|
393
|
+
[
|
394
|
+
({"from": 0, "to": 1}, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 1}]),
|
395
|
+
({"from": 1, "to": 2}, [{"id": "3", "cursor_field": 2}, {"id": "4", "cursor_field": 3}]),
|
396
|
+
],
|
397
|
+
"stream1",
|
398
|
+
json_schema={
|
399
|
+
"type": "object",
|
400
|
+
"properties": {
|
401
|
+
"id": {"type": ["null", "string"]},
|
402
|
+
},
|
403
|
+
},
|
404
|
+
)
|
405
|
+
]
|
406
|
+
)
|
407
|
+
.set_incremental(CursorField("cursor_field"), ("from", "to"))
|
408
|
+
)
|
409
|
+
.set_expected_records(
|
410
|
+
[
|
411
|
+
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
412
|
+
{"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
|
413
|
+
{"stream1": {"slices": [{"start": 0, "end": 1}]}},
|
414
|
+
{"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
|
415
|
+
{"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
|
416
|
+
{"stream1": {"slices": [{"start": 0, "end": 1}, {"start": 1, "end": 2}]}},
|
417
|
+
]
|
418
|
+
)
|
419
|
+
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
420
|
+
.set_incremental_scenario_config(
|
421
|
+
IncrementalScenarioConfig(
|
422
|
+
input_state=[],
|
423
|
+
)
|
424
|
+
)
|
425
|
+
.build()
|
426
|
+
)
|
427
|
+
|
428
|
+
|
429
|
+
_NO_SLICE_BOUNDARIES = None
|
430
|
+
test_incremental_stream_without_slice_boundaries = (
|
431
|
+
TestScenarioBuilder()
|
432
|
+
.set_name("test_incremental_stream_without_slice_boundaries")
|
433
|
+
.set_config({})
|
434
|
+
.set_source_builder(
|
435
|
+
StreamFacadeSourceBuilder()
|
436
|
+
.set_streams(
|
437
|
+
[
|
438
|
+
_MockStream(
|
439
|
+
[
|
440
|
+
(None, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 3}]),
|
441
|
+
],
|
442
|
+
"stream1",
|
443
|
+
json_schema={
|
444
|
+
"type": "object",
|
445
|
+
"properties": {
|
446
|
+
"id": {"type": ["null", "string"]},
|
447
|
+
},
|
448
|
+
},
|
449
|
+
)
|
450
|
+
]
|
451
|
+
)
|
452
|
+
.set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
|
453
|
+
)
|
454
|
+
.set_expected_records(
|
455
|
+
[
|
456
|
+
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
457
|
+
{"data": {"id": "2", "cursor_field": 3}, "stream": "stream1"},
|
458
|
+
{"stream1": {"slices": [{"start": 0, "end": 3}]}},
|
459
|
+
]
|
460
|
+
)
|
461
|
+
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
462
|
+
.set_incremental_scenario_config(
|
463
|
+
IncrementalScenarioConfig(
|
464
|
+
input_state=[],
|
465
|
+
)
|
466
|
+
)
|
467
|
+
.build()
|
468
|
+
)
|
469
|
+
|
470
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries = (
|
471
|
+
TestScenarioBuilder()
|
472
|
+
.set_name("test_incremental_stream_with_many_slices_byt_without_slice_boundaries")
|
473
|
+
.set_config({})
|
474
|
+
.set_source_builder(
|
475
|
+
StreamFacadeSourceBuilder()
|
476
|
+
.set_streams(
|
477
|
+
[
|
478
|
+
_MockStream(
|
479
|
+
[
|
480
|
+
({"parent_id": 1}, [{"id": "1", "cursor_field": 0}]),
|
481
|
+
({"parent_id": 309}, [{"id": "3", "cursor_field": 0}]),
|
482
|
+
],
|
483
|
+
"stream1",
|
484
|
+
json_schema={
|
485
|
+
"type": "object",
|
486
|
+
"properties": {
|
487
|
+
"id": {"type": ["null", "string"]},
|
488
|
+
},
|
489
|
+
},
|
490
|
+
)
|
491
|
+
]
|
492
|
+
)
|
493
|
+
.set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
|
494
|
+
)
|
495
|
+
.set_expected_read_error(ValueError, "test exception")
|
496
|
+
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
497
|
+
.set_incremental_scenario_config(
|
498
|
+
IncrementalScenarioConfig(
|
499
|
+
input_state=[],
|
500
|
+
)
|
501
|
+
)
|
502
|
+
.build()
|
503
|
+
)
|
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
|
|
11
11
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
|
12
12
|
from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
|
13
13
|
from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
|
14
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
15
|
+
test_incremental_stream_with_slice_boundaries,
|
16
|
+
test_incremental_stream_without_slice_boundaries,
|
14
17
|
test_stream_facade_multiple_streams,
|
15
18
|
test_stream_facade_raises_exception,
|
16
19
|
test_stream_facade_single_stream,
|
@@ -43,6 +46,9 @@ scenarios = [
|
|
43
46
|
test_stream_facade_single_stream_with_multiple_slices,
|
44
47
|
test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
|
45
48
|
test_stream_facade_raises_exception,
|
49
|
+
test_incremental_stream_with_slice_boundaries,
|
50
|
+
test_incremental_stream_without_slice_boundaries,
|
51
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
46
52
|
]
|
47
53
|
|
48
54
|
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py
CHANGED
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
|
|
12
12
|
from airbyte_cdk.sources.streams import Stream
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
15
16
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
16
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
17
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
18
19
|
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
20
|
+
from airbyte_cdk.sources.streams.core import StreamData
|
19
21
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
20
22
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
21
23
|
from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
|
22
24
|
|
23
25
|
|
26
|
+
class LegacyStream(Stream):
|
27
|
+
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
28
|
+
return None
|
29
|
+
|
30
|
+
def read_records(
|
31
|
+
self,
|
32
|
+
sync_mode: SyncMode,
|
33
|
+
cursor_field: Optional[List[str]] = None,
|
34
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
35
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
36
|
+
) -> Iterable[StreamData]:
|
37
|
+
yield from []
|
38
|
+
|
39
|
+
|
24
40
|
class ConcurrentCdkSource(AbstractSource):
|
25
41
|
def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
|
26
42
|
self._streams = streams
|
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
31
47
|
return True, None
|
32
48
|
|
33
49
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
34
|
-
return [StreamFacade(s) for s in self._streams]
|
50
|
+
return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
|
35
51
|
|
36
52
|
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
37
53
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
40
56
|
return ConfiguredAirbyteCatalog(
|
41
57
|
streams=[
|
42
58
|
ConfiguredAirbyteStream(
|
43
|
-
stream=StreamFacade(s).as_airbyte_stream(),
|
59
|
+
stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
|
44
60
|
sync_mode=SyncMode.full_refresh,
|
45
61
|
destination_sync_mode=DestinationSyncMode.overwrite,
|
46
62
|
)
|
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
|
|
57
73
|
def __init__(self, partitions: List[Partition]):
|
58
74
|
self._partitions = partitions
|
59
75
|
|
60
|
-
def generate(self
|
76
|
+
def generate(self) -> Iterable[Partition]:
|
61
77
|
yield from self._partitions
|
62
78
|
|
63
79
|
|
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
|
|
17
17
|
StreamPartitionGenerator,
|
18
18
|
)
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
|
20
21
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
21
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
|
+
from airbyte_cdk.sources.streams.core import Stream
|
22
24
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
23
25
|
|
26
|
+
_ANY_SYNC_MODE = SyncMode.full_refresh
|
27
|
+
_ANY_STATE = {"state_key": "state_value"}
|
28
|
+
_ANY_CURSOR_FIELD = ["a", "cursor", "key"]
|
29
|
+
|
24
30
|
|
25
31
|
@pytest.mark.parametrize(
|
26
32
|
"stream_availability, expected_available, expected_message",
|
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
|
|
71
77
|
stream_slices = [{"slice": 1}, {"slice": 2}]
|
72
78
|
stream.stream_slices.return_value = stream_slices
|
73
79
|
|
74
|
-
partition_generator = StreamPartitionGenerator(stream, message_repository)
|
80
|
+
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
75
81
|
|
76
|
-
partitions = list(partition_generator.generate(
|
82
|
+
partitions = list(partition_generator.generate())
|
77
83
|
slices = [partition.to_slice() for partition in partitions]
|
78
84
|
assert slices == stream_slices
|
85
|
+
stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
|
79
86
|
|
80
87
|
|
81
88
|
@pytest.mark.parametrize(
|
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
|
|
95
102
|
stream.transformer = transformer
|
96
103
|
message_repository = InMemoryMessageRepository()
|
97
104
|
_slice = None
|
98
|
-
|
105
|
+
sync_mode = SyncMode.full_refresh
|
106
|
+
cursor_field = None
|
107
|
+
state = None
|
108
|
+
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
|
99
109
|
|
100
110
|
a_log_message = AirbyteMessage(
|
101
111
|
type=MessageType.LOG,
|
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
128
138
|
|
129
139
|
message_repository = InMemoryMessageRepository()
|
130
140
|
_slice = None
|
131
|
-
|
141
|
+
|
142
|
+
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
132
143
|
|
133
144
|
stream.read_records.side_effect = Exception()
|
134
145
|
|
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
148
159
|
def test_stream_partition_hash(_slice, expected_hash):
|
149
160
|
stream = Mock()
|
150
161
|
stream.name = "stream"
|
151
|
-
partition = StreamPartition(stream, _slice, Mock())
|
162
|
+
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
152
163
|
|
153
164
|
_hash = partition.__hash__()
|
154
165
|
assert _hash == expected_hash
|
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
163
174
|
json_schema={"type": "object"},
|
164
175
|
supported_sync_modes=[SyncMode.full_refresh],
|
165
176
|
)
|
166
|
-
self.
|
177
|
+
self._legacy_stream = Mock(spec=Stream)
|
178
|
+
self._cursor = Mock(spec=Cursor)
|
179
|
+
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
|
167
180
|
self._logger = Mock()
|
168
181
|
self._source = Mock()
|
169
182
|
self._max_workers = 10
|
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
|
|
191
204
|
assert self._facade.get_json_schema() == json_schema
|
192
205
|
self._abstract_stream.get_json_schema.assert_called_once_with()
|
193
206
|
|
194
|
-
def
|
195
|
-
assert
|
207
|
+
def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
|
208
|
+
assert (
|
209
|
+
StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
|
210
|
+
== self._legacy_stream.supports_incremental
|
211
|
+
)
|
212
|
+
|
213
|
+
def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
|
214
|
+
assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
|
196
215
|
|
197
216
|
def test_check_availability_is_delegated_to_wrapped_stream(self):
|
198
217
|
availability = StreamAvailable()
|
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
|
|
219
238
|
assert actual_stream_data == expected_stream_data
|
220
239
|
|
221
240
|
def test_read_records_incremental(self):
|
222
|
-
|
223
|
-
|
241
|
+
expected_stream_data = [{"data": 1}, {"data": 2}]
|
242
|
+
records = [Record(data) for data in expected_stream_data]
|
243
|
+
self._abstract_stream.read.return_value = records
|
244
|
+
|
245
|
+
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
246
|
+
|
247
|
+
assert actual_stream_data == expected_stream_data
|
224
248
|
|
225
249
|
def test_create_from_stream_stream(self):
|
226
250
|
stream = Mock()
|
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
228
252
|
stream.primary_key = "id"
|
229
253
|
stream.cursor_field = "cursor"
|
230
254
|
|
231
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
255
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
232
256
|
|
233
257
|
assert facade.name == "stream"
|
234
258
|
assert facade.cursor_field == "cursor"
|
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
240
264
|
stream.primary_key = None
|
241
265
|
stream.cursor_field = []
|
242
266
|
|
243
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
267
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
244
268
|
facade._abstract_stream._primary_key is None
|
245
269
|
|
246
270
|
def test_create_from_stream_with_composite_primary_key(self):
|
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
249
273
|
stream.primary_key = ["id", "name"]
|
250
274
|
stream.cursor_field = []
|
251
275
|
|
252
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
276
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
253
277
|
facade._abstract_stream._primary_key == ["id", "name"]
|
254
278
|
|
255
279
|
def test_create_from_stream_with_empty_list_cursor(self):
|
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
257
281
|
stream.primary_key = "id"
|
258
282
|
stream.cursor_field = []
|
259
283
|
|
260
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
284
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
261
285
|
|
262
286
|
assert facade.cursor_field == []
|
263
287
|
|
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
267
291
|
stream.primary_key = [["field", "id"]]
|
268
292
|
|
269
293
|
with self.assertRaises(ValueError):
|
270
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
294
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
271
295
|
|
272
296
|
def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
|
273
297
|
stream = Mock()
|
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
275
299
|
stream.primary_key = 123
|
276
300
|
|
277
301
|
with self.assertRaises(ValueError):
|
278
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
302
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
279
303
|
|
280
304
|
def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
|
281
305
|
stream = Mock()
|
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
284
308
|
stream.cursor_field = ["field", "cursor"]
|
285
309
|
|
286
310
|
with self.assertRaises(ValueError):
|
287
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
311
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
288
312
|
|
289
313
|
def test_create_from_stream_with_cursor_field_as_list(self):
|
290
314
|
stream = Mock()
|
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
292
316
|
stream.primary_key = "id"
|
293
317
|
stream.cursor_field = ["cursor"]
|
294
318
|
|
295
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
319
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
296
320
|
assert facade.cursor_field == "cursor"
|
297
321
|
|
298
322
|
def test_create_from_stream_none_message_repository(self):
|
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
|
|
302
326
|
self._source.message_repository = None
|
303
327
|
|
304
328
|
with self.assertRaises(ValueError):
|
305
|
-
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
329
|
+
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
|
306
330
|
|
307
331
|
def test_get_error_display_message_no_display_message(self):
|
308
332
|
self._stream.get_error_display_message.return_value = "display_message"
|
309
333
|
|
310
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
334
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
311
335
|
|
312
336
|
expected_display_message = None
|
313
337
|
e = Exception()
|
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
319
343
|
def test_get_error_display_message_with_display_message(self):
|
320
344
|
self._stream.get_error_display_message.return_value = "display_message"
|
321
345
|
|
322
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
346
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
323
347
|
|
324
348
|
expected_display_message = "display_message"
|
325
349
|
e = ExceptionWithDisplayMessage("display_message")
|
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
338
362
|
)
|
339
363
|
def test_get_error_display_message(exception, expected_display_message):
|
340
364
|
stream = Mock()
|
341
|
-
|
365
|
+
legacy_stream = Mock()
|
366
|
+
cursor = Mock(spec=Cursor)
|
367
|
+
facade = StreamFacade(stream, legacy_stream, cursor)
|
342
368
|
|
343
369
|
display_message = facade.get_error_display_message(exception)
|
344
370
|
|
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
|
|
21
21
|
|
22
22
|
stream = Mock()
|
23
23
|
message_repository = Mock()
|
24
|
-
partitions = [StreamPartition(stream, s, message_repository) for s in slices]
|
25
|
-
stream.generate.return_value = iter(partitions)
|
26
|
-
|
27
24
|
sync_mode = SyncMode.full_refresh
|
25
|
+
cursor_field = None
|
26
|
+
state = None
|
27
|
+
partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
|
28
|
+
stream.generate.return_value = iter(partitions)
|
28
29
|
|
29
|
-
partition_generator.generate_partitions(stream
|
30
|
+
partition_generator.generate_partitions(stream)
|
30
31
|
|
31
32
|
actual_partitions = []
|
32
33
|
while partition := queue.get(False):
|