airbyte-cdk 0.52.6__py3-none-any.whl → 0.52.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/destinations/vector_db_based/config.py +1 -0
- airbyte_cdk/sources/abstract_source.py +12 -61
- airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -1
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +1 -2
- airbyte_cdk/sources/message/repository.py +0 -6
- airbyte_cdk/sources/source.py +14 -13
- airbyte_cdk/sources/streams/concurrent/adapters.py +94 -21
- airbyte_cdk/sources/streams/concurrent/cursor.py +148 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +2 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +3 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +1 -3
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +7 -3
- airbyte_cdk/sources/streams/core.py +71 -1
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/METADATA +3 -3
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/RECORD +32 -30
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/WHEEL +1 -1
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +5 -0
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +1 -1
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +16 -0
- unit_tests/sources/message/test_repository.py +7 -20
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +46 -5
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +154 -37
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +6 -0
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +19 -3
- unit_tests/sources/streams/concurrent/test_adapters.py +48 -22
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +5 -4
- unit_tests/sources/streams/concurrent/test_cursor.py +130 -0
- unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +14 -10
- unit_tests/sources/streams/test_stream_read.py +3 -1
- unit_tests/sources/test_abstract_source.py +12 -9
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.52.6.dist-info → airbyte_cdk-0.52.8.dist-info}/top_level.txt +0 -0
@@ -6,27 +6,61 @@ from typing import Any, List, Mapping, Optional, Tuple, Union
|
|
6
6
|
|
7
7
|
from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
|
8
8
|
from airbyte_cdk.sources import AbstractSource
|
9
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
9
10
|
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
10
11
|
from airbyte_cdk.sources.streams import Stream
|
11
12
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
13
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField, NoopCursor
|
12
14
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
13
15
|
from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
|
14
16
|
|
17
|
+
_NO_STATE = None
|
18
|
+
|
15
19
|
|
16
20
|
class StreamFacadeSource(AbstractSource):
|
17
|
-
def __init__(
|
21
|
+
def __init__(
|
22
|
+
self,
|
23
|
+
streams: List[Stream],
|
24
|
+
max_workers: int,
|
25
|
+
cursor_field: Optional[CursorField] = None,
|
26
|
+
cursor_boundaries: Optional[Tuple[str, str]] = None,
|
27
|
+
):
|
18
28
|
self._streams = streams
|
19
29
|
self._max_workers = max_workers
|
30
|
+
self._message_repository = InMemoryMessageRepository()
|
31
|
+
self._cursor_field = cursor_field
|
32
|
+
self._cursor_boundaries = cursor_boundaries
|
20
33
|
|
21
34
|
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
|
22
35
|
return True, None
|
23
36
|
|
24
37
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
25
|
-
|
38
|
+
state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=_NO_STATE)
|
39
|
+
return [
|
40
|
+
StreamFacade.create_from_stream(
|
41
|
+
stream,
|
42
|
+
self,
|
43
|
+
stream.logger,
|
44
|
+
self._max_workers,
|
45
|
+
_NO_STATE,
|
46
|
+
ConcurrentCursor(
|
47
|
+
stream.name,
|
48
|
+
stream.namespace,
|
49
|
+
_NO_STATE,
|
50
|
+
self.message_repository, # type: ignore # for this source specifically, we always return `InMemoryMessageRepository`
|
51
|
+
state_manager,
|
52
|
+
self._cursor_field,
|
53
|
+
self._cursor_boundaries,
|
54
|
+
)
|
55
|
+
if self._cursor_field
|
56
|
+
else NoopCursor(),
|
57
|
+
)
|
58
|
+
for stream in self._streams
|
59
|
+
]
|
26
60
|
|
27
61
|
@property
|
28
62
|
def message_repository(self) -> Union[None, MessageRepository]:
|
29
|
-
return
|
63
|
+
return self._message_repository
|
30
64
|
|
31
65
|
def spec(self, logger: logging.Logger) -> ConnectorSpecification:
|
32
66
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -49,14 +83,21 @@ class StreamFacadeSourceBuilder(SourceBuilder[StreamFacadeSource]):
|
|
49
83
|
self._source = None
|
50
84
|
self._streams = []
|
51
85
|
self._max_workers = 1
|
86
|
+
self._cursor_field = None
|
87
|
+
self._cursor_boundaries = None
|
52
88
|
|
53
89
|
def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder":
|
54
90
|
self._streams = streams
|
55
91
|
return self
|
56
92
|
|
57
|
-
def set_max_workers(self, max_workers: int):
|
93
|
+
def set_max_workers(self, max_workers: int) -> "StreamFacadeSourceBuilder":
|
58
94
|
self._max_workers = max_workers
|
59
95
|
return self
|
60
96
|
|
97
|
+
def set_incremental(self, cursor_field: CursorField, cursor_boundaries: Optional[Tuple[str, str]]) -> "StreamFacadeSourceBuilder":
|
98
|
+
self._cursor_field = cursor_field
|
99
|
+
self._cursor_boundaries = cursor_boundaries
|
100
|
+
return self
|
101
|
+
|
61
102
|
def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> StreamFacadeSource:
|
62
|
-
return StreamFacadeSource(self._streams, self._max_workers)
|
103
|
+
return StreamFacadeSource(self._streams, self._max_workers, self._cursor_field, self._cursor_boundaries)
|
@@ -1,26 +1,25 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
from typing import Any, Iterable, List, Mapping, Optional, Union
|
4
|
+
from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
|
5
5
|
|
6
6
|
from airbyte_cdk.models import SyncMode
|
7
7
|
from airbyte_cdk.sources.streams import Stream
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
8
9
|
from airbyte_cdk.sources.streams.core import StreamData
|
9
|
-
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
|
10
|
+
from unit_tests.sources.file_based.scenarios.scenario_builder import IncrementalScenarioConfig, TestScenarioBuilder
|
10
11
|
from unit_tests.sources.streams.concurrent.scenarios.stream_facade_builder import StreamFacadeSourceBuilder
|
11
12
|
|
12
13
|
|
13
14
|
class _MockStream(Stream):
|
14
15
|
def __init__(
|
15
16
|
self,
|
16
|
-
|
17
|
-
slice_values_to_records_or_exception: Mapping[Optional[str], List[Union[Mapping[str, Any], Exception]]],
|
17
|
+
slices_and_records_or_exception: Iterable[Tuple[Optional[Mapping[str, Any]], Iterable[Union[Exception, Mapping[str, Any]]]]],
|
18
18
|
name,
|
19
19
|
json_schema,
|
20
20
|
primary_key=None,
|
21
21
|
):
|
22
|
-
self.
|
23
|
-
self._slice_values_to_records = slice_values_to_records_or_exception
|
22
|
+
self._slices_and_records_or_exception = slices_and_records_or_exception
|
24
23
|
self._name = name
|
25
24
|
self._json_schema = json_schema
|
26
25
|
self._primary_key = primary_key
|
@@ -32,19 +31,12 @@ class _MockStream(Stream):
|
|
32
31
|
stream_slice: Optional[Mapping[str, Any]] = None,
|
33
32
|
stream_state: Optional[Mapping[str, Any]] = None,
|
34
33
|
) -> Iterable[StreamData]:
|
35
|
-
for
|
36
|
-
if
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
def _get_record_or_exception_iterable(
|
42
|
-
self, stream_slice: Optional[Mapping[str, Any]] = None
|
43
|
-
) -> Iterable[Union[Mapping[str, Any], Exception]]:
|
44
|
-
if stream_slice is None:
|
45
|
-
return self._slice_values_to_records[None]
|
46
|
-
else:
|
47
|
-
return self._slice_values_to_records[stream_slice[self._slice_key]]
|
34
|
+
for _slice, records_or_exception in self._slices_and_records_or_exception:
|
35
|
+
if stream_slice == _slice:
|
36
|
+
for item in records_or_exception:
|
37
|
+
if isinstance(item, Exception):
|
38
|
+
raise item
|
39
|
+
yield item
|
48
40
|
|
49
41
|
@property
|
50
42
|
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
@@ -60,16 +52,16 @@ class _MockStream(Stream):
|
|
60
52
|
def stream_slices(
|
61
53
|
self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None
|
62
54
|
) -> Iterable[Optional[Mapping[str, Any]]]:
|
63
|
-
if self.
|
64
|
-
for
|
65
|
-
yield {self._slice_key: slice_value}
|
55
|
+
if self._slices_and_records_or_exception:
|
56
|
+
yield from [_slice for _slice, records_or_exception in self._slices_and_records_or_exception]
|
66
57
|
else:
|
67
58
|
yield None
|
68
59
|
|
69
60
|
|
70
61
|
_stream1 = _MockStream(
|
71
|
-
|
72
|
-
|
62
|
+
[
|
63
|
+
(None, [{"id": "1"}, {"id": "2"}]),
|
64
|
+
],
|
73
65
|
"stream1",
|
74
66
|
json_schema={
|
75
67
|
"type": "object",
|
@@ -80,8 +72,9 @@ _stream1 = _MockStream(
|
|
80
72
|
)
|
81
73
|
|
82
74
|
_stream_raising_exception = _MockStream(
|
83
|
-
|
84
|
-
|
75
|
+
[
|
76
|
+
(None, [{"id": "1"}, ValueError("test exception")]),
|
77
|
+
],
|
85
78
|
"stream1",
|
86
79
|
json_schema={
|
87
80
|
"type": "object",
|
@@ -92,8 +85,9 @@ _stream_raising_exception = _MockStream(
|
|
92
85
|
)
|
93
86
|
|
94
87
|
_stream_with_primary_key = _MockStream(
|
95
|
-
|
96
|
-
|
88
|
+
[
|
89
|
+
(None, [{"id": "1"}, {"id": "2"}]),
|
90
|
+
],
|
97
91
|
"stream1",
|
98
92
|
json_schema={
|
99
93
|
"type": "object",
|
@@ -105,8 +99,9 @@ _stream_with_primary_key = _MockStream(
|
|
105
99
|
)
|
106
100
|
|
107
101
|
_stream2 = _MockStream(
|
108
|
-
|
109
|
-
|
102
|
+
[
|
103
|
+
(None, [{"id": "A"}, {"id": "B"}]),
|
104
|
+
],
|
110
105
|
"stream2",
|
111
106
|
json_schema={
|
112
107
|
"type": "object",
|
@@ -117,8 +112,9 @@ _stream2 = _MockStream(
|
|
117
112
|
)
|
118
113
|
|
119
114
|
_stream_with_single_slice = _MockStream(
|
120
|
-
|
121
|
-
|
115
|
+
[
|
116
|
+
({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
|
117
|
+
],
|
122
118
|
"stream1",
|
123
119
|
json_schema={
|
124
120
|
"type": "object",
|
@@ -129,11 +125,10 @@ _stream_with_single_slice = _MockStream(
|
|
129
125
|
)
|
130
126
|
|
131
127
|
_stream_with_multiple_slices = _MockStream(
|
132
|
-
|
133
|
-
|
134
|
-
"
|
135
|
-
|
136
|
-
},
|
128
|
+
[
|
129
|
+
({"slice_key": "s1"}, [{"id": "1"}, {"id": "2"}]),
|
130
|
+
({"slice_key": "s2"}, [{"id": "3"}, {"id": "4"}]),
|
131
|
+
],
|
137
132
|
"stream1",
|
138
133
|
json_schema={
|
139
134
|
"type": "object",
|
@@ -384,3 +379,125 @@ test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two
|
|
384
379
|
)
|
385
380
|
.build()
|
386
381
|
)
|
382
|
+
|
383
|
+
|
384
|
+
test_incremental_stream_with_slice_boundaries = (
|
385
|
+
TestScenarioBuilder()
|
386
|
+
.set_name("test_incremental_stream_with_slice_boundaries")
|
387
|
+
.set_config({})
|
388
|
+
.set_source_builder(
|
389
|
+
StreamFacadeSourceBuilder()
|
390
|
+
.set_streams(
|
391
|
+
[
|
392
|
+
_MockStream(
|
393
|
+
[
|
394
|
+
({"from": 0, "to": 1}, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 1}]),
|
395
|
+
({"from": 1, "to": 2}, [{"id": "3", "cursor_field": 2}, {"id": "4", "cursor_field": 3}]),
|
396
|
+
],
|
397
|
+
"stream1",
|
398
|
+
json_schema={
|
399
|
+
"type": "object",
|
400
|
+
"properties": {
|
401
|
+
"id": {"type": ["null", "string"]},
|
402
|
+
},
|
403
|
+
},
|
404
|
+
)
|
405
|
+
]
|
406
|
+
)
|
407
|
+
.set_incremental(CursorField("cursor_field"), ("from", "to"))
|
408
|
+
)
|
409
|
+
.set_expected_records(
|
410
|
+
[
|
411
|
+
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
412
|
+
{"data": {"id": "2", "cursor_field": 1}, "stream": "stream1"},
|
413
|
+
{"stream1": {"slices": [{"start": 0, "end": 1}]}},
|
414
|
+
{"data": {"id": "3", "cursor_field": 2}, "stream": "stream1"},
|
415
|
+
{"data": {"id": "4", "cursor_field": 3}, "stream": "stream1"},
|
416
|
+
{"stream1": {"slices": [{"start": 0, "end": 1}, {"start": 1, "end": 2}]}},
|
417
|
+
]
|
418
|
+
)
|
419
|
+
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
420
|
+
.set_incremental_scenario_config(
|
421
|
+
IncrementalScenarioConfig(
|
422
|
+
input_state=[],
|
423
|
+
)
|
424
|
+
)
|
425
|
+
.build()
|
426
|
+
)
|
427
|
+
|
428
|
+
|
429
|
+
_NO_SLICE_BOUNDARIES = None
|
430
|
+
test_incremental_stream_without_slice_boundaries = (
|
431
|
+
TestScenarioBuilder()
|
432
|
+
.set_name("test_incremental_stream_without_slice_boundaries")
|
433
|
+
.set_config({})
|
434
|
+
.set_source_builder(
|
435
|
+
StreamFacadeSourceBuilder()
|
436
|
+
.set_streams(
|
437
|
+
[
|
438
|
+
_MockStream(
|
439
|
+
[
|
440
|
+
(None, [{"id": "1", "cursor_field": 0}, {"id": "2", "cursor_field": 3}]),
|
441
|
+
],
|
442
|
+
"stream1",
|
443
|
+
json_schema={
|
444
|
+
"type": "object",
|
445
|
+
"properties": {
|
446
|
+
"id": {"type": ["null", "string"]},
|
447
|
+
},
|
448
|
+
},
|
449
|
+
)
|
450
|
+
]
|
451
|
+
)
|
452
|
+
.set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
|
453
|
+
)
|
454
|
+
.set_expected_records(
|
455
|
+
[
|
456
|
+
{"data": {"id": "1", "cursor_field": 0}, "stream": "stream1"},
|
457
|
+
{"data": {"id": "2", "cursor_field": 3}, "stream": "stream1"},
|
458
|
+
{"stream1": {"slices": [{"start": 0, "end": 3}]}},
|
459
|
+
]
|
460
|
+
)
|
461
|
+
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
462
|
+
.set_incremental_scenario_config(
|
463
|
+
IncrementalScenarioConfig(
|
464
|
+
input_state=[],
|
465
|
+
)
|
466
|
+
)
|
467
|
+
.build()
|
468
|
+
)
|
469
|
+
|
470
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries = (
|
471
|
+
TestScenarioBuilder()
|
472
|
+
.set_name("test_incremental_stream_with_many_slices_byt_without_slice_boundaries")
|
473
|
+
.set_config({})
|
474
|
+
.set_source_builder(
|
475
|
+
StreamFacadeSourceBuilder()
|
476
|
+
.set_streams(
|
477
|
+
[
|
478
|
+
_MockStream(
|
479
|
+
[
|
480
|
+
({"parent_id": 1}, [{"id": "1", "cursor_field": 0}]),
|
481
|
+
({"parent_id": 309}, [{"id": "3", "cursor_field": 0}]),
|
482
|
+
],
|
483
|
+
"stream1",
|
484
|
+
json_schema={
|
485
|
+
"type": "object",
|
486
|
+
"properties": {
|
487
|
+
"id": {"type": ["null", "string"]},
|
488
|
+
},
|
489
|
+
},
|
490
|
+
)
|
491
|
+
]
|
492
|
+
)
|
493
|
+
.set_incremental(CursorField("cursor_field"), _NO_SLICE_BOUNDARIES)
|
494
|
+
)
|
495
|
+
.set_expected_read_error(ValueError, "test exception")
|
496
|
+
.set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"})
|
497
|
+
.set_incremental_scenario_config(
|
498
|
+
IncrementalScenarioConfig(
|
499
|
+
input_state=[],
|
500
|
+
)
|
501
|
+
)
|
502
|
+
.build()
|
503
|
+
)
|
@@ -11,6 +11,9 @@ from pytest import LogCaptureFixture
|
|
11
11
|
from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario
|
12
12
|
from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read
|
13
13
|
from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import (
|
14
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
15
|
+
test_incremental_stream_with_slice_boundaries,
|
16
|
+
test_incremental_stream_without_slice_boundaries,
|
14
17
|
test_stream_facade_multiple_streams,
|
15
18
|
test_stream_facade_raises_exception,
|
16
19
|
test_stream_facade_single_stream,
|
@@ -43,6 +46,9 @@ scenarios = [
|
|
43
46
|
test_stream_facade_single_stream_with_multiple_slices,
|
44
47
|
test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two,
|
45
48
|
test_stream_facade_raises_exception,
|
49
|
+
test_incremental_stream_with_slice_boundaries,
|
50
|
+
test_incremental_stream_without_slice_boundaries,
|
51
|
+
test_incremental_stream_with_many_slices_but_without_slice_boundaries,
|
46
52
|
]
|
47
53
|
|
48
54
|
|
unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py
CHANGED
@@ -12,15 +12,31 @@ from airbyte_cdk.sources.message import MessageRepository
|
|
12
12
|
from airbyte_cdk.sources.streams import Stream
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
|
15
16
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
16
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
17
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
18
19
|
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
20
|
+
from airbyte_cdk.sources.streams.core import StreamData
|
19
21
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
20
22
|
from airbyte_protocol.models import ConfiguredAirbyteStream
|
21
23
|
from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder
|
22
24
|
|
23
25
|
|
26
|
+
class LegacyStream(Stream):
|
27
|
+
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
28
|
+
return None
|
29
|
+
|
30
|
+
def read_records(
|
31
|
+
self,
|
32
|
+
sync_mode: SyncMode,
|
33
|
+
cursor_field: Optional[List[str]] = None,
|
34
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
35
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
36
|
+
) -> Iterable[StreamData]:
|
37
|
+
yield from []
|
38
|
+
|
39
|
+
|
24
40
|
class ConcurrentCdkSource(AbstractSource):
|
25
41
|
def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
|
26
42
|
self._streams = streams
|
@@ -31,7 +47,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
31
47
|
return True, None
|
32
48
|
|
33
49
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
34
|
-
return [StreamFacade(s) for s in self._streams]
|
50
|
+
return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
|
35
51
|
|
36
52
|
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
37
53
|
return ConnectorSpecification(connectionSpecification={})
|
@@ -40,7 +56,7 @@ class ConcurrentCdkSource(AbstractSource):
|
|
40
56
|
return ConfiguredAirbyteCatalog(
|
41
57
|
streams=[
|
42
58
|
ConfiguredAirbyteStream(
|
43
|
-
stream=StreamFacade(s).as_airbyte_stream(),
|
59
|
+
stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
|
44
60
|
sync_mode=SyncMode.full_refresh,
|
45
61
|
destination_sync_mode=DestinationSyncMode.overwrite,
|
46
62
|
)
|
@@ -57,7 +73,7 @@ class InMemoryPartitionGenerator(PartitionGenerator):
|
|
57
73
|
def __init__(self, partitions: List[Partition]):
|
58
74
|
self._partitions = partitions
|
59
75
|
|
60
|
-
def generate(self
|
76
|
+
def generate(self) -> Iterable[Partition]:
|
61
77
|
yield from self._partitions
|
62
78
|
|
63
79
|
|
@@ -17,10 +17,16 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
|
|
17
17
|
StreamPartitionGenerator,
|
18
18
|
)
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
|
20
21
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
21
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
|
+
from airbyte_cdk.sources.streams.core import Stream
|
22
24
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
23
25
|
|
26
|
+
_ANY_SYNC_MODE = SyncMode.full_refresh
|
27
|
+
_ANY_STATE = {"state_key": "state_value"}
|
28
|
+
_ANY_CURSOR_FIELD = ["a", "cursor", "key"]
|
29
|
+
|
24
30
|
|
25
31
|
@pytest.mark.parametrize(
|
26
32
|
"stream_availability, expected_available, expected_message",
|
@@ -71,11 +77,12 @@ def test_stream_partition_generator(sync_mode):
|
|
71
77
|
stream_slices = [{"slice": 1}, {"slice": 2}]
|
72
78
|
stream.stream_slices.return_value = stream_slices
|
73
79
|
|
74
|
-
partition_generator = StreamPartitionGenerator(stream, message_repository)
|
80
|
+
partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
75
81
|
|
76
|
-
partitions = list(partition_generator.generate(
|
82
|
+
partitions = list(partition_generator.generate())
|
77
83
|
slices = [partition.to_slice() for partition in partitions]
|
78
84
|
assert slices == stream_slices
|
85
|
+
stream.stream_slices.assert_called_once_with(sync_mode=_ANY_SYNC_MODE, cursor_field=_ANY_CURSOR_FIELD, stream_state=_ANY_STATE)
|
79
86
|
|
80
87
|
|
81
88
|
@pytest.mark.parametrize(
|
@@ -95,7 +102,10 @@ def test_stream_partition(transformer, expected_records):
|
|
95
102
|
stream.transformer = transformer
|
96
103
|
message_repository = InMemoryMessageRepository()
|
97
104
|
_slice = None
|
98
|
-
|
105
|
+
sync_mode = SyncMode.full_refresh
|
106
|
+
cursor_field = None
|
107
|
+
state = None
|
108
|
+
partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
|
99
109
|
|
100
110
|
a_log_message = AirbyteMessage(
|
101
111
|
type=MessageType.LOG,
|
@@ -128,7 +138,8 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
128
138
|
|
129
139
|
message_repository = InMemoryMessageRepository()
|
130
140
|
_slice = None
|
131
|
-
|
141
|
+
|
142
|
+
partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
132
143
|
|
133
144
|
stream.read_records.side_effect = Exception()
|
134
145
|
|
@@ -148,7 +159,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
|
|
148
159
|
def test_stream_partition_hash(_slice, expected_hash):
|
149
160
|
stream = Mock()
|
150
161
|
stream.name = "stream"
|
151
|
-
partition = StreamPartition(stream, _slice, Mock())
|
162
|
+
partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
|
152
163
|
|
153
164
|
_hash = partition.__hash__()
|
154
165
|
assert _hash == expected_hash
|
@@ -163,7 +174,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
163
174
|
json_schema={"type": "object"},
|
164
175
|
supported_sync_modes=[SyncMode.full_refresh],
|
165
176
|
)
|
166
|
-
self.
|
177
|
+
self._legacy_stream = Mock(spec=Stream)
|
178
|
+
self._cursor = Mock(spec=Cursor)
|
179
|
+
self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
|
167
180
|
self._logger = Mock()
|
168
181
|
self._source = Mock()
|
169
182
|
self._max_workers = 10
|
@@ -191,8 +204,14 @@ class StreamFacadeTest(unittest.TestCase):
|
|
191
204
|
assert self._facade.get_json_schema() == json_schema
|
192
205
|
self._abstract_stream.get_json_schema.assert_called_once_with()
|
193
206
|
|
194
|
-
def
|
195
|
-
assert
|
207
|
+
def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
|
208
|
+
assert (
|
209
|
+
StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
|
210
|
+
== self._legacy_stream.supports_incremental
|
211
|
+
)
|
212
|
+
|
213
|
+
def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
|
214
|
+
assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
|
196
215
|
|
197
216
|
def test_check_availability_is_delegated_to_wrapped_stream(self):
|
198
217
|
availability = StreamAvailable()
|
@@ -219,8 +238,13 @@ class StreamFacadeTest(unittest.TestCase):
|
|
219
238
|
assert actual_stream_data == expected_stream_data
|
220
239
|
|
221
240
|
def test_read_records_incremental(self):
|
222
|
-
|
223
|
-
|
241
|
+
expected_stream_data = [{"data": 1}, {"data": 2}]
|
242
|
+
records = [Record(data) for data in expected_stream_data]
|
243
|
+
self._abstract_stream.read.return_value = records
|
244
|
+
|
245
|
+
actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
|
246
|
+
|
247
|
+
assert actual_stream_data == expected_stream_data
|
224
248
|
|
225
249
|
def test_create_from_stream_stream(self):
|
226
250
|
stream = Mock()
|
@@ -228,7 +252,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
228
252
|
stream.primary_key = "id"
|
229
253
|
stream.cursor_field = "cursor"
|
230
254
|
|
231
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
255
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
232
256
|
|
233
257
|
assert facade.name == "stream"
|
234
258
|
assert facade.cursor_field == "cursor"
|
@@ -240,7 +264,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
240
264
|
stream.primary_key = None
|
241
265
|
stream.cursor_field = []
|
242
266
|
|
243
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
267
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
244
268
|
facade._abstract_stream._primary_key is None
|
245
269
|
|
246
270
|
def test_create_from_stream_with_composite_primary_key(self):
|
@@ -249,7 +273,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
249
273
|
stream.primary_key = ["id", "name"]
|
250
274
|
stream.cursor_field = []
|
251
275
|
|
252
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
276
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
253
277
|
facade._abstract_stream._primary_key == ["id", "name"]
|
254
278
|
|
255
279
|
def test_create_from_stream_with_empty_list_cursor(self):
|
@@ -257,7 +281,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
257
281
|
stream.primary_key = "id"
|
258
282
|
stream.cursor_field = []
|
259
283
|
|
260
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
284
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
261
285
|
|
262
286
|
assert facade.cursor_field == []
|
263
287
|
|
@@ -267,7 +291,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
267
291
|
stream.primary_key = [["field", "id"]]
|
268
292
|
|
269
293
|
with self.assertRaises(ValueError):
|
270
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
294
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
271
295
|
|
272
296
|
def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
|
273
297
|
stream = Mock()
|
@@ -275,7 +299,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
275
299
|
stream.primary_key = 123
|
276
300
|
|
277
301
|
with self.assertRaises(ValueError):
|
278
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
302
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
279
303
|
|
280
304
|
def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
|
281
305
|
stream = Mock()
|
@@ -284,7 +308,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
284
308
|
stream.cursor_field = ["field", "cursor"]
|
285
309
|
|
286
310
|
with self.assertRaises(ValueError):
|
287
|
-
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
311
|
+
StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
288
312
|
|
289
313
|
def test_create_from_stream_with_cursor_field_as_list(self):
|
290
314
|
stream = Mock()
|
@@ -292,7 +316,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
292
316
|
stream.primary_key = "id"
|
293
317
|
stream.cursor_field = ["cursor"]
|
294
318
|
|
295
|
-
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers)
|
319
|
+
facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
296
320
|
assert facade.cursor_field == "cursor"
|
297
321
|
|
298
322
|
def test_create_from_stream_none_message_repository(self):
|
@@ -302,12 +326,12 @@ class StreamFacadeTest(unittest.TestCase):
|
|
302
326
|
self._source.message_repository = None
|
303
327
|
|
304
328
|
with self.assertRaises(ValueError):
|
305
|
-
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
329
|
+
StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
|
306
330
|
|
307
331
|
def test_get_error_display_message_no_display_message(self):
|
308
332
|
self._stream.get_error_display_message.return_value = "display_message"
|
309
333
|
|
310
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
334
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
311
335
|
|
312
336
|
expected_display_message = None
|
313
337
|
e = Exception()
|
@@ -319,7 +343,7 @@ class StreamFacadeTest(unittest.TestCase):
|
|
319
343
|
def test_get_error_display_message_with_display_message(self):
|
320
344
|
self._stream.get_error_display_message.return_value = "display_message"
|
321
345
|
|
322
|
-
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers)
|
346
|
+
facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
|
323
347
|
|
324
348
|
expected_display_message = "display_message"
|
325
349
|
e = ExceptionWithDisplayMessage("display_message")
|
@@ -338,7 +362,9 @@ class StreamFacadeTest(unittest.TestCase):
|
|
338
362
|
)
|
339
363
|
def test_get_error_display_message(exception, expected_display_message):
|
340
364
|
stream = Mock()
|
341
|
-
|
365
|
+
legacy_stream = Mock()
|
366
|
+
cursor = Mock(spec=Cursor)
|
367
|
+
facade = StreamFacade(stream, legacy_stream, cursor)
|
342
368
|
|
343
369
|
display_message = facade.get_error_display_message(exception)
|
344
370
|
|
@@ -21,12 +21,13 @@ def test_partition_generator(slices):
|
|
21
21
|
|
22
22
|
stream = Mock()
|
23
23
|
message_repository = Mock()
|
24
|
-
partitions = [StreamPartition(stream, s, message_repository) for s in slices]
|
25
|
-
stream.generate.return_value = iter(partitions)
|
26
|
-
|
27
24
|
sync_mode = SyncMode.full_refresh
|
25
|
+
cursor_field = None
|
26
|
+
state = None
|
27
|
+
partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
|
28
|
+
stream.generate.return_value = iter(partitions)
|
28
29
|
|
29
|
-
partition_generator.generate_partitions(stream
|
30
|
+
partition_generator.generate_partitions(stream)
|
30
31
|
|
31
32
|
actual_partitions = []
|
32
33
|
while partition := queue.get(False):
|