airbyte-cdk 0.54.0__py3-none-any.whl → 0.55.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
  2. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
  3. airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
  4. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
  5. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
  6. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
  7. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
  8. airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
  10. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
  11. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
  12. airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
  13. airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
  14. airbyte_cdk/sources/utils/slice_logger.py +5 -0
  15. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
  16. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +35 -23
  17. unit_tests/sources/concurrent_source/__init__.py +3 -0
  18. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
  19. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
  23. unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
  24. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
  25. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
  26. unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
  27. unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
  28. unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
  29. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
  30. unit_tests/sources/streams/test_stream_read.py +1 -2
  31. unit_tests/sources/test_concurrent_source.py +105 -0
  32. unit_tests/sources/test_source_read.py +461 -0
  33. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
  34. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
  35. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -116,15 +116,14 @@ test_stream_facade_single_stream = (
116
116
  .set_expected_logs(
117
117
  {
118
118
  "read": [
119
- {"level": "INFO", "message": "Starting syncing StreamFacadeSource"},
119
+ {"level": "INFO", "message": "Starting syncing"},
120
120
  {"level": "INFO", "message": "Marking stream stream1 as STARTED"},
121
121
  {"level": "INFO", "message": "Syncing stream: stream1"},
122
122
  {"level": "INFO", "message": "Marking stream stream1 as RUNNING"},
123
123
  {"level": "INFO", "message": "Read 2 records from stream1 stream"},
124
124
  {"level": "INFO", "message": "Marking stream stream1 as STOPPED"},
125
125
  {"level": "INFO", "message": "Finished syncing stream1"},
126
- {"level": "INFO", "message": "StreamFacadeSource runtimes"},
127
- {"level": "INFO", "message": "Finished syncing StreamFacadeSource"},
126
+ {"level": "INFO", "message": "Finished syncing"},
128
127
  ]
129
128
  }
130
129
  )
@@ -1,25 +1,23 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
5
4
  import logging
6
5
 
7
6
  from airbyte_cdk.sources.message import InMemoryMessageRepository
7
+ from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
8
8
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
9
- from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
10
- from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
11
9
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
12
10
  from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import (
13
11
  AlwaysAvailableAvailabilityStrategy,
14
12
  ConcurrentSourceBuilder,
15
13
  InMemoryPartition,
16
14
  InMemoryPartitionGenerator,
17
- NeverLogSliceLogger,
18
15
  )
19
16
 
20
- _id_only_stream = ThreadBasedConcurrentStream(
21
- partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]),
22
- max_workers=1,
17
+ _id_only_stream = DefaultStream(
18
+ partition_generator=InMemoryPartitionGenerator(
19
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
20
+ ),
23
21
  name="stream1",
24
22
  json_schema={
25
23
  "type": "object",
@@ -30,15 +28,13 @@ _id_only_stream = ThreadBasedConcurrentStream(
30
28
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
31
29
  primary_key=[],
32
30
  cursor_field=None,
33
- slice_logger=NeverLogSliceLogger(),
34
31
  logger=logging.getLogger("test_logger"),
35
- message_repository=None,
36
- timeout_seconds=300,
37
32
  )
38
33
 
39
- _id_only_stream_with_slice_logger = ThreadBasedConcurrentStream(
40
- partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]),
41
- max_workers=1,
34
+ _id_only_stream_with_slice_logger = DefaultStream(
35
+ partition_generator=InMemoryPartitionGenerator(
36
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
37
+ ),
42
38
  name="stream1",
43
39
  json_schema={
44
40
  "type": "object",
@@ -49,15 +45,13 @@ _id_only_stream_with_slice_logger = ThreadBasedConcurrentStream(
49
45
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
50
46
  primary_key=[],
51
47
  cursor_field=None,
52
- slice_logger=AlwaysLogSliceLogger(),
53
48
  logger=logging.getLogger("test_logger"),
54
- message_repository=None,
55
- timeout_seconds=300,
56
49
  )
57
50
 
58
- _id_only_stream_with_primary_key = ThreadBasedConcurrentStream(
59
- partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]),
60
- max_workers=1,
51
+ _id_only_stream_with_primary_key = DefaultStream(
52
+ partition_generator=InMemoryPartitionGenerator(
53
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
54
+ ),
61
55
  name="stream1",
62
56
  json_schema={
63
57
  "type": "object",
@@ -68,20 +62,16 @@ _id_only_stream_with_primary_key = ThreadBasedConcurrentStream(
68
62
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
69
63
  primary_key=["id"],
70
64
  cursor_field=None,
71
- slice_logger=NeverLogSliceLogger(),
72
65
  logger=logging.getLogger("test_logger"),
73
- message_repository=None,
74
- timeout_seconds=300,
75
66
  )
76
67
 
77
- _id_only_stream_multiple_partitions = ThreadBasedConcurrentStream(
68
+ _id_only_stream_multiple_partitions = DefaultStream(
78
69
  partition_generator=InMemoryPartitionGenerator(
79
70
  [
80
- InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
81
- InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
71
+ InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
72
+ InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
82
73
  ]
83
74
  ),
84
- max_workers=1,
85
75
  name="stream1",
86
76
  json_schema={
87
77
  "type": "object",
@@ -92,20 +82,16 @@ _id_only_stream_multiple_partitions = ThreadBasedConcurrentStream(
92
82
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
93
83
  primary_key=[],
94
84
  cursor_field=None,
95
- slice_logger=NeverLogSliceLogger(),
96
85
  logger=logging.getLogger("test_logger"),
97
- message_repository=None,
98
- timeout_seconds=300,
99
86
  )
100
87
 
101
- _id_only_stream_multiple_partitions_concurrency_level_two = ThreadBasedConcurrentStream(
88
+ _id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
102
89
  partition_generator=InMemoryPartitionGenerator(
103
90
  [
104
- InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
105
- InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
91
+ InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
92
+ InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
106
93
  ]
107
94
  ),
108
- max_workers=2,
109
95
  name="stream1",
110
96
  json_schema={
111
97
  "type": "object",
@@ -116,17 +102,13 @@ _id_only_stream_multiple_partitions_concurrency_level_two = ThreadBasedConcurren
116
102
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
117
103
  primary_key=[],
118
104
  cursor_field=None,
119
- slice_logger=NeverLogSliceLogger(),
120
105
  logger=logging.getLogger("test_logger"),
121
- message_repository=None,
122
- timeout_seconds=300,
123
106
  )
124
107
 
125
- _stream_raising_exception = ThreadBasedConcurrentStream(
108
+ _stream_raising_exception = DefaultStream(
126
109
  partition_generator=InMemoryPartitionGenerator(
127
- [InMemoryPartition("partition1", None, [Record({"id": "1"}), ValueError("test exception")])]
110
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), ValueError("test exception")])]
128
111
  ),
129
- max_workers=1,
130
112
  name="stream1",
131
113
  json_schema={
132
114
  "type": "object",
@@ -137,10 +119,7 @@ _stream_raising_exception = ThreadBasedConcurrentStream(
137
119
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
138
120
  primary_key=[],
139
121
  cursor_field=None,
140
- slice_logger=NeverLogSliceLogger(),
141
122
  logger=logging.getLogger("test_logger"),
142
- message_repository=None,
143
- timeout_seconds=300,
144
123
  )
145
124
 
146
125
  test_concurrent_cdk_single_stream = (
@@ -165,15 +144,14 @@ test_concurrent_cdk_single_stream = (
165
144
  .set_expected_logs(
166
145
  {
167
146
  "read": [
168
- {"level": "INFO", "message": "Starting syncing ConcurrentCdkSource"},
147
+ {"level": "INFO", "message": "Starting syncing"},
169
148
  {"level": "INFO", "message": "Marking stream stream1 as STARTED"},
170
149
  {"level": "INFO", "message": "Syncing stream: stream1"},
171
150
  {"level": "INFO", "message": "Marking stream stream1 as RUNNING"},
172
151
  {"level": "INFO", "message": "Read 2 records from stream1 stream"},
173
152
  {"level": "INFO", "message": "Marking stream stream1 as STOPPED"},
174
153
  {"level": "INFO", "message": "Finished syncing stream1"},
175
- {"level": "INFO", "message": "ConcurrentCdkSource runtimes"},
176
- {"level": "INFO", "message": "Finished syncing ConcurrentCdkSource"},
154
+ {"level": "INFO", "message": "Finished syncing"},
177
155
  ]
178
156
  }
179
157
  )
@@ -202,11 +180,13 @@ test_concurrent_cdk_single_stream_with_primary_key = (
202
180
  .set_name("test_concurrent_cdk_single_stream_with_primary_key")
203
181
  .set_config({})
204
182
  .set_source_builder(
205
- ConcurrentSourceBuilder().set_streams(
183
+ ConcurrentSourceBuilder()
184
+ .set_streams(
206
185
  [
207
186
  _id_only_stream_with_primary_key,
208
187
  ]
209
188
  )
189
+ .set_message_repository(InMemoryMessageRepository())
210
190
  )
211
191
  .set_expected_records(
212
192
  [
@@ -239,14 +219,21 @@ test_concurrent_cdk_multiple_streams = (
239
219
  .set_name("test_concurrent_cdk_multiple_streams")
240
220
  .set_config({})
241
221
  .set_source_builder(
242
- ConcurrentSourceBuilder().set_streams(
222
+ ConcurrentSourceBuilder()
223
+ .set_streams(
243
224
  [
244
225
  _id_only_stream,
245
- ThreadBasedConcurrentStream(
226
+ DefaultStream(
246
227
  partition_generator=InMemoryPartitionGenerator(
247
- [InMemoryPartition("partition1", None, [Record({"id": "10", "key": "v1"}), Record({"id": "20", "key": "v2"})])]
228
+ [
229
+ InMemoryPartition(
230
+ "partition1",
231
+ "stream2",
232
+ None,
233
+ [Record({"id": "10", "key": "v1"}, "stream2"), Record({"id": "20", "key": "v2"}, "stream2")],
234
+ )
235
+ ]
248
236
  ),
249
- max_workers=1,
250
237
  name="stream2",
251
238
  json_schema={
252
239
  "type": "object",
@@ -258,13 +245,11 @@ test_concurrent_cdk_multiple_streams = (
258
245
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
259
246
  primary_key=[],
260
247
  cursor_field=None,
261
- slice_logger=NeverLogSliceLogger(),
262
248
  logger=logging.getLogger("test_logger"),
263
- message_repository=None,
264
- timeout_seconds=300,
265
249
  ),
266
250
  ]
267
251
  )
252
+ .set_message_repository(InMemoryMessageRepository())
268
253
  )
269
254
  .set_expected_records(
270
255
  [
@@ -347,11 +332,13 @@ test_concurrent_cdk_single_stream_multiple_partitions = (
347
332
  .set_name("test_concurrent_cdk_single_stream_multiple_partitions")
348
333
  .set_config({})
349
334
  .set_source_builder(
350
- ConcurrentSourceBuilder().set_streams(
335
+ ConcurrentSourceBuilder()
336
+ .set_streams(
351
337
  [
352
338
  _id_only_stream_multiple_partitions,
353
339
  ]
354
340
  )
341
+ .set_message_repository(InMemoryMessageRepository())
355
342
  )
356
343
  .set_expected_records(
357
344
  [
@@ -385,11 +372,13 @@ test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two = (
385
372
  .set_name("test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_2")
386
373
  .set_config({})
387
374
  .set_source_builder(
388
- ConcurrentSourceBuilder().set_streams(
375
+ ConcurrentSourceBuilder()
376
+ .set_streams(
389
377
  [
390
378
  _id_only_stream_multiple_partitions_concurrency_level_two,
391
379
  ]
392
380
  )
381
+ .set_message_repository(InMemoryMessageRepository())
393
382
  )
394
383
  .set_expected_records(
395
384
  [
@@ -1,22 +1,22 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
5
4
  import json
6
5
  import logging
7
6
  from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
8
7
 
9
8
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
10
- from airbyte_cdk.sources import AbstractSource
9
+ from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
10
+ from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
11
11
  from airbyte_cdk.sources.message import MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
15
  from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
16
+ from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
16
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
17
18
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
18
19
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
19
- from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
20
20
  from airbyte_cdk.sources.streams.core import StreamData
21
21
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
22
22
  from airbyte_protocol.models import ConfiguredAirbyteStream
@@ -37,17 +37,18 @@ class LegacyStream(Stream):
37
37
  yield from []
38
38
 
39
39
 
40
- class ConcurrentCdkSource(AbstractSource):
41
- def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
40
+ class ConcurrentCdkSource(ConcurrentSourceAdapter):
41
+ def __init__(self, streams: List[DefaultStream], message_repository: Optional[MessageRepository], max_workers, timeout_in_seconds):
42
+ concurrent_source = ConcurrentSource.create(1, 1, streams[0]._logger, NeverLogSliceLogger(), message_repository)
43
+ super().__init__(concurrent_source)
42
44
  self._streams = streams
43
- self._message_repository = message_repository
44
45
 
45
46
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
46
47
  # Check is not verified because it is up to the source to implement this method
47
48
  return True, None
48
49
 
49
50
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
50
- return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
51
+ return [StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger) for s in self._streams]
51
52
 
52
53
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
53
54
  return ConnectorSpecification(connectionSpecification={})
@@ -56,7 +57,7 @@ class ConcurrentCdkSource(AbstractSource):
56
57
  return ConfiguredAirbyteCatalog(
57
58
  streams=[
58
59
  ConfiguredAirbyteStream(
59
- stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
60
+ stream=StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
60
61
  sync_mode=SyncMode.full_refresh,
61
62
  destination_sync_mode=DestinationSyncMode.overwrite,
62
63
  )
@@ -78,10 +79,15 @@ class InMemoryPartitionGenerator(PartitionGenerator):
78
79
 
79
80
 
80
81
  class InMemoryPartition(Partition):
81
- def __init__(self, name, _slice, records):
82
+ def stream_name(self) -> str:
83
+ return self._stream_name
84
+
85
+ def __init__(self, name, stream_name, _slice, records):
82
86
  self._name = name
87
+ self._stream_name = stream_name
83
88
  self._slice = _slice
84
89
  self._records = records
90
+ self._is_closed = False
85
91
 
86
92
  def read(self) -> Iterable[Record]:
87
93
  for record_or_exception in self._records:
@@ -101,19 +107,22 @@ class InMemoryPartition(Partition):
101
107
  else:
102
108
  return hash(self._name)
103
109
 
110
+ def close(self) -> None:
111
+ self._is_closed = True
112
+
113
+ def is_closed(self) -> bool:
114
+ return self._is_closed
115
+
104
116
 
105
117
  class ConcurrentSourceBuilder(SourceBuilder[ConcurrentCdkSource]):
106
118
  def __init__(self):
107
- self._streams: List[ThreadBasedConcurrentStream] = []
119
+ self._streams: List[DefaultStream] = []
108
120
  self._message_repository = None
109
121
 
110
122
  def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> ConcurrentCdkSource:
111
- for stream in self._streams:
112
- if not stream._message_repository:
113
- stream._message_repository = self._message_repository
114
- return ConcurrentCdkSource(self._streams, self._message_repository)
123
+ return ConcurrentCdkSource(self._streams, self._message_repository, 1, 1)
115
124
 
116
- def set_streams(self, streams: List[ThreadBasedConcurrentStream]) -> "ConcurrentSourceBuilder":
125
+ def set_streams(self, streams: List[DefaultStream]) -> "ConcurrentSourceBuilder":
117
126
  self._streams = streams
118
127
  return self
119
128
 
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
4
+ import logging
5
5
  import unittest
6
6
  from unittest.mock import Mock
7
7
 
@@ -17,15 +17,18 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
17
17
  StreamPartitionGenerator,
18
18
  )
19
19
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
20
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
20
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
21
21
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
22
22
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
23
23
  from airbyte_cdk.sources.streams.core import Stream
24
+ from airbyte_cdk.sources.utils.slice_logger import SliceLogger
24
25
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
25
26
 
26
27
  _ANY_SYNC_MODE = SyncMode.full_refresh
27
28
  _ANY_STATE = {"state_key": "state_value"}
28
29
  _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
30
+ _STREAM_NAME = "stream"
31
+ _ANY_CURSOR = Mock(spec=Cursor)
29
32
 
30
33
 
31
34
  @pytest.mark.parametrize(
@@ -77,7 +80,7 @@ def test_stream_partition_generator(sync_mode):
77
80
  stream_slices = [{"slice": 1}, {"slice": 2}]
78
81
  stream.stream_slices.return_value = stream_slices
79
82
 
80
- partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
83
+ partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
81
84
 
82
85
  partitions = list(partition_generator.generate())
83
86
  slices = [partition.to_slice() for partition in partitions]
@@ -88,16 +91,21 @@ def test_stream_partition_generator(sync_mode):
88
91
  @pytest.mark.parametrize(
89
92
  "transformer, expected_records",
90
93
  [
91
- pytest.param(TypeTransformer(TransformConfig.NoTransform), [Record({"data": "1"}), Record({"data": "2"})], id="test_no_transform"),
94
+ pytest.param(
95
+ TypeTransformer(TransformConfig.NoTransform),
96
+ [Record({"data": "1"}, _STREAM_NAME), Record({"data": "2"}, _STREAM_NAME)],
97
+ id="test_no_transform",
98
+ ),
92
99
  pytest.param(
93
100
  TypeTransformer(TransformConfig.DefaultSchemaNormalization),
94
- [Record({"data": 1}), Record({"data": 2})],
101
+ [Record({"data": 1}, _STREAM_NAME), Record({"data": 2}, _STREAM_NAME)],
95
102
  id="test_default_transform",
96
103
  ),
97
104
  ],
98
105
  )
99
106
  def test_stream_partition(transformer, expected_records):
100
107
  stream = Mock()
108
+ stream.name = _STREAM_NAME
101
109
  stream.get_json_schema.return_value = {"type": "object", "properties": {"data": {"type": ["integer"]}}}
102
110
  stream.transformer = transformer
103
111
  message_repository = InMemoryMessageRepository()
@@ -105,7 +113,7 @@ def test_stream_partition(transformer, expected_records):
105
113
  sync_mode = SyncMode.full_refresh
106
114
  cursor_field = None
107
115
  state = None
108
- partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
116
+ partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state, _ANY_CURSOR)
109
117
 
110
118
  a_log_message = AirbyteMessage(
111
119
  type=MessageType.LOG,
@@ -139,7 +147,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
139
147
  message_repository = InMemoryMessageRepository()
140
148
  _slice = None
141
149
 
142
- partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
150
+ partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
143
151
 
144
152
  stream.read_records.side_effect = Exception()
145
153
 
@@ -159,7 +167,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
159
167
  def test_stream_partition_hash(_slice, expected_hash):
160
168
  stream = Mock()
161
169
  stream.name = "stream"
162
- partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
170
+ partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
163
171
 
164
172
  _hash = partition.__hash__()
165
173
  assert _hash == expected_hash
@@ -176,10 +184,11 @@ class StreamFacadeTest(unittest.TestCase):
176
184
  )
177
185
  self._legacy_stream = Mock(spec=Stream)
178
186
  self._cursor = Mock(spec=Cursor)
179
- self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
180
187
  self._logger = Mock()
188
+ self._slice_logger = Mock()
189
+ self._slice_logger.should_log_slice_message.return_value = False
190
+ self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor, self._slice_logger, self._logger)
181
191
  self._source = Mock()
182
- self._max_workers = 10
183
192
 
184
193
  self._stream = Mock()
185
194
  self._stream.primary_key = "id"
@@ -206,12 +215,16 @@ class StreamFacadeTest(unittest.TestCase):
206
215
 
207
216
  def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
208
217
  assert (
209
- StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
218
+ StreamFacade(
219
+ self._abstract_stream, self._legacy_stream, _ANY_CURSOR, Mock(spec=SliceLogger), Mock(spec=logging.Logger)
220
+ ).supports_incremental
210
221
  == self._legacy_stream.supports_incremental
211
222
  )
212
223
 
213
224
  def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
214
- assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
225
+ assert StreamFacade(
226
+ self._abstract_stream, self._legacy_stream, Mock(spec=Cursor), Mock(spec=SliceLogger), Mock(spec=logging.Logger)
227
+ ).supports_incremental
215
228
 
216
229
  def test_check_availability_is_delegated_to_wrapped_stream(self):
217
230
  availability = StreamAvailable()
@@ -221,8 +234,11 @@ class StreamFacadeTest(unittest.TestCase):
221
234
 
222
235
  def test_full_refresh(self):
223
236
  expected_stream_data = [{"data": 1}, {"data": 2}]
224
- records = [Record(data) for data in expected_stream_data]
225
- self._abstract_stream.read.return_value = records
237
+ records = [Record(data, "stream") for data in expected_stream_data]
238
+
239
+ partition = Mock()
240
+ partition.read.return_value = records
241
+ self._abstract_stream.generate_partitions.return_value = [partition]
226
242
 
227
243
  actual_stream_data = list(self._facade.read_records(SyncMode.full_refresh, None, None, None))
228
244
 
@@ -230,8 +246,10 @@ class StreamFacadeTest(unittest.TestCase):
230
246
 
231
247
  def test_read_records_full_refresh(self):
232
248
  expected_stream_data = [{"data": 1}, {"data": 2}]
233
- records = [Record(data) for data in expected_stream_data]
234
- self._abstract_stream.read.return_value = records
249
+ records = [Record(data, "stream") for data in expected_stream_data]
250
+ partition = Mock()
251
+ partition.read.return_value = records
252
+ self._abstract_stream.generate_partitions.return_value = [partition]
235
253
 
236
254
  actual_stream_data = list(self._facade.read_full_refresh(None, None, None))
237
255
 
@@ -239,8 +257,10 @@ class StreamFacadeTest(unittest.TestCase):
239
257
 
240
258
  def test_read_records_incremental(self):
241
259
  expected_stream_data = [{"data": 1}, {"data": 2}]
242
- records = [Record(data) for data in expected_stream_data]
243
- self._abstract_stream.read.return_value = records
260
+ records = [Record(data, "stream") for data in expected_stream_data]
261
+ partition = Mock()
262
+ partition.read.return_value = records
263
+ self._abstract_stream.generate_partitions.return_value = [partition]
244
264
 
245
265
  actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
246
266
 
@@ -252,7 +272,7 @@ class StreamFacadeTest(unittest.TestCase):
252
272
  stream.primary_key = "id"
253
273
  stream.cursor_field = "cursor"
254
274
 
255
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
275
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
256
276
 
257
277
  assert facade.name == "stream"
258
278
  assert facade.cursor_field == "cursor"
@@ -264,8 +284,8 @@ class StreamFacadeTest(unittest.TestCase):
264
284
  stream.primary_key = None
265
285
  stream.cursor_field = []
266
286
 
267
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
268
- facade._abstract_stream._primary_key is None
287
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
288
+ assert facade._abstract_stream._primary_key == []
269
289
 
270
290
  def test_create_from_stream_with_composite_primary_key(self):
271
291
  stream = Mock()
@@ -273,15 +293,15 @@ class StreamFacadeTest(unittest.TestCase):
273
293
  stream.primary_key = ["id", "name"]
274
294
  stream.cursor_field = []
275
295
 
276
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
277
- facade._abstract_stream._primary_key == ["id", "name"]
296
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
297
+ assert facade._abstract_stream._primary_key == ["id", "name"]
278
298
 
279
299
  def test_create_from_stream_with_empty_list_cursor(self):
280
300
  stream = Mock()
281
301
  stream.primary_key = "id"
282
302
  stream.cursor_field = []
283
303
 
284
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
304
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
285
305
 
286
306
  assert facade.cursor_field == []
287
307
 
@@ -291,7 +311,7 @@ class StreamFacadeTest(unittest.TestCase):
291
311
  stream.primary_key = [["field", "id"]]
292
312
 
293
313
  with self.assertRaises(ValueError):
294
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
314
+ StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
295
315
 
296
316
  def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
297
317
  stream = Mock()
@@ -299,7 +319,7 @@ class StreamFacadeTest(unittest.TestCase):
299
319
  stream.primary_key = 123
300
320
 
301
321
  with self.assertRaises(ValueError):
302
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
322
+ StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
303
323
 
304
324
  def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
305
325
  stream = Mock()
@@ -308,7 +328,7 @@ class StreamFacadeTest(unittest.TestCase):
308
328
  stream.cursor_field = ["field", "cursor"]
309
329
 
310
330
  with self.assertRaises(ValueError):
311
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
331
+ StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
312
332
 
313
333
  def test_create_from_stream_with_cursor_field_as_list(self):
314
334
  stream = Mock()
@@ -316,7 +336,7 @@ class StreamFacadeTest(unittest.TestCase):
316
336
  stream.primary_key = "id"
317
337
  stream.cursor_field = ["cursor"]
318
338
 
319
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
339
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
320
340
  assert facade.cursor_field == "cursor"
321
341
 
322
342
  def test_create_from_stream_none_message_repository(self):
@@ -326,12 +346,12 @@ class StreamFacadeTest(unittest.TestCase):
326
346
  self._source.message_repository = None
327
347
 
328
348
  with self.assertRaises(ValueError):
329
- StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
349
+ StreamFacade.create_from_stream(self._stream, self._source, self._logger, {}, self._cursor)
330
350
 
331
351
  def test_get_error_display_message_no_display_message(self):
332
352
  self._stream.get_error_display_message.return_value = "display_message"
333
353
 
334
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
354
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
335
355
 
336
356
  expected_display_message = None
337
357
  e = Exception()
@@ -343,7 +363,7 @@ class StreamFacadeTest(unittest.TestCase):
343
363
  def test_get_error_display_message_with_display_message(self):
344
364
  self._stream.get_error_display_message.return_value = "display_message"
345
365
 
346
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
366
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
347
367
 
348
368
  expected_display_message = "display_message"
349
369
  e = ExceptionWithDisplayMessage("display_message")
@@ -364,7 +384,7 @@ def test_get_error_display_message(exception, expected_display_message):
364
384
  stream = Mock()
365
385
  legacy_stream = Mock()
366
386
  cursor = Mock(spec=Cursor)
367
- facade = StreamFacade(stream, legacy_stream, cursor)
387
+ facade = StreamFacade(stream, legacy_stream, cursor, Mock().Mock(), Mock())
368
388
 
369
389
  display_message = facade.get_error_display_message(exception)
370
390