airbyte-cdk 0.54.0__py3-none-any.whl → 0.55.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
  2. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
  3. airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
  4. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
  5. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
  6. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
  7. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
  8. airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
  10. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
  11. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
  12. airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
  13. airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
  14. airbyte_cdk/sources/utils/slice_logger.py +5 -0
  15. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
  16. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +35 -23
  17. unit_tests/sources/concurrent_source/__init__.py +3 -0
  18. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
  19. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
  23. unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
  24. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
  25. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
  26. unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
  27. unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
  28. unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
  29. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
  30. unit_tests/sources/streams/test_stream_read.py +1 -2
  31. unit_tests/sources/test_concurrent_source.py +105 -0
  32. unit_tests/sources/test_source_read.py +461 -0
  33. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
  34. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
  35. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -116,15 +116,14 @@ test_stream_facade_single_stream = (
116
116
  .set_expected_logs(
117
117
  {
118
118
  "read": [
119
- {"level": "INFO", "message": "Starting syncing StreamFacadeSource"},
119
+ {"level": "INFO", "message": "Starting syncing"},
120
120
  {"level": "INFO", "message": "Marking stream stream1 as STARTED"},
121
121
  {"level": "INFO", "message": "Syncing stream: stream1"},
122
122
  {"level": "INFO", "message": "Marking stream stream1 as RUNNING"},
123
123
  {"level": "INFO", "message": "Read 2 records from stream1 stream"},
124
124
  {"level": "INFO", "message": "Marking stream stream1 as STOPPED"},
125
125
  {"level": "INFO", "message": "Finished syncing stream1"},
126
- {"level": "INFO", "message": "StreamFacadeSource runtimes"},
127
- {"level": "INFO", "message": "Finished syncing StreamFacadeSource"},
126
+ {"level": "INFO", "message": "Finished syncing"},
128
127
  ]
129
128
  }
130
129
  )
@@ -1,25 +1,23 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
5
4
  import logging
6
5
 
7
6
  from airbyte_cdk.sources.message import InMemoryMessageRepository
7
+ from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
8
8
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
9
- from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
10
- from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger
11
9
  from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder
12
10
  from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import (
13
11
  AlwaysAvailableAvailabilityStrategy,
14
12
  ConcurrentSourceBuilder,
15
13
  InMemoryPartition,
16
14
  InMemoryPartitionGenerator,
17
- NeverLogSliceLogger,
18
15
  )
19
16
 
20
- _id_only_stream = ThreadBasedConcurrentStream(
21
- partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]),
22
- max_workers=1,
17
+ _id_only_stream = DefaultStream(
18
+ partition_generator=InMemoryPartitionGenerator(
19
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
20
+ ),
23
21
  name="stream1",
24
22
  json_schema={
25
23
  "type": "object",
@@ -30,15 +28,13 @@ _id_only_stream = ThreadBasedConcurrentStream(
30
28
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
31
29
  primary_key=[],
32
30
  cursor_field=None,
33
- slice_logger=NeverLogSliceLogger(),
34
31
  logger=logging.getLogger("test_logger"),
35
- message_repository=None,
36
- timeout_seconds=300,
37
32
  )
38
33
 
39
- _id_only_stream_with_slice_logger = ThreadBasedConcurrentStream(
40
- partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]),
41
- max_workers=1,
34
+ _id_only_stream_with_slice_logger = DefaultStream(
35
+ partition_generator=InMemoryPartitionGenerator(
36
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
37
+ ),
42
38
  name="stream1",
43
39
  json_schema={
44
40
  "type": "object",
@@ -49,15 +45,13 @@ _id_only_stream_with_slice_logger = ThreadBasedConcurrentStream(
49
45
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
50
46
  primary_key=[],
51
47
  cursor_field=None,
52
- slice_logger=AlwaysLogSliceLogger(),
53
48
  logger=logging.getLogger("test_logger"),
54
- message_repository=None,
55
- timeout_seconds=300,
56
49
  )
57
50
 
58
- _id_only_stream_with_primary_key = ThreadBasedConcurrentStream(
59
- partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]),
60
- max_workers=1,
51
+ _id_only_stream_with_primary_key = DefaultStream(
52
+ partition_generator=InMemoryPartitionGenerator(
53
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")])]
54
+ ),
61
55
  name="stream1",
62
56
  json_schema={
63
57
  "type": "object",
@@ -68,20 +62,16 @@ _id_only_stream_with_primary_key = ThreadBasedConcurrentStream(
68
62
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
69
63
  primary_key=["id"],
70
64
  cursor_field=None,
71
- slice_logger=NeverLogSliceLogger(),
72
65
  logger=logging.getLogger("test_logger"),
73
- message_repository=None,
74
- timeout_seconds=300,
75
66
  )
76
67
 
77
- _id_only_stream_multiple_partitions = ThreadBasedConcurrentStream(
68
+ _id_only_stream_multiple_partitions = DefaultStream(
78
69
  partition_generator=InMemoryPartitionGenerator(
79
70
  [
80
- InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
81
- InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
71
+ InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
72
+ InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
82
73
  ]
83
74
  ),
84
- max_workers=1,
85
75
  name="stream1",
86
76
  json_schema={
87
77
  "type": "object",
@@ -92,20 +82,16 @@ _id_only_stream_multiple_partitions = ThreadBasedConcurrentStream(
92
82
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
93
83
  primary_key=[],
94
84
  cursor_field=None,
95
- slice_logger=NeverLogSliceLogger(),
96
85
  logger=logging.getLogger("test_logger"),
97
- message_repository=None,
98
- timeout_seconds=300,
99
86
  )
100
87
 
101
- _id_only_stream_multiple_partitions_concurrency_level_two = ThreadBasedConcurrentStream(
88
+ _id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream(
102
89
  partition_generator=InMemoryPartitionGenerator(
103
90
  [
104
- InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]),
105
- InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]),
91
+ InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, "stream1"), Record({"id": "2"}, "stream1")]),
92
+ InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, "stream1"), Record({"id": "4"}, "stream1")]),
106
93
  ]
107
94
  ),
108
- max_workers=2,
109
95
  name="stream1",
110
96
  json_schema={
111
97
  "type": "object",
@@ -116,17 +102,13 @@ _id_only_stream_multiple_partitions_concurrency_level_two = ThreadBasedConcurren
116
102
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
117
103
  primary_key=[],
118
104
  cursor_field=None,
119
- slice_logger=NeverLogSliceLogger(),
120
105
  logger=logging.getLogger("test_logger"),
121
- message_repository=None,
122
- timeout_seconds=300,
123
106
  )
124
107
 
125
- _stream_raising_exception = ThreadBasedConcurrentStream(
108
+ _stream_raising_exception = DefaultStream(
126
109
  partition_generator=InMemoryPartitionGenerator(
127
- [InMemoryPartition("partition1", None, [Record({"id": "1"}), ValueError("test exception")])]
110
+ [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, "stream1"), ValueError("test exception")])]
128
111
  ),
129
- max_workers=1,
130
112
  name="stream1",
131
113
  json_schema={
132
114
  "type": "object",
@@ -137,10 +119,7 @@ _stream_raising_exception = ThreadBasedConcurrentStream(
137
119
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
138
120
  primary_key=[],
139
121
  cursor_field=None,
140
- slice_logger=NeverLogSliceLogger(),
141
122
  logger=logging.getLogger("test_logger"),
142
- message_repository=None,
143
- timeout_seconds=300,
144
123
  )
145
124
 
146
125
  test_concurrent_cdk_single_stream = (
@@ -165,15 +144,14 @@ test_concurrent_cdk_single_stream = (
165
144
  .set_expected_logs(
166
145
  {
167
146
  "read": [
168
- {"level": "INFO", "message": "Starting syncing ConcurrentCdkSource"},
147
+ {"level": "INFO", "message": "Starting syncing"},
169
148
  {"level": "INFO", "message": "Marking stream stream1 as STARTED"},
170
149
  {"level": "INFO", "message": "Syncing stream: stream1"},
171
150
  {"level": "INFO", "message": "Marking stream stream1 as RUNNING"},
172
151
  {"level": "INFO", "message": "Read 2 records from stream1 stream"},
173
152
  {"level": "INFO", "message": "Marking stream stream1 as STOPPED"},
174
153
  {"level": "INFO", "message": "Finished syncing stream1"},
175
- {"level": "INFO", "message": "ConcurrentCdkSource runtimes"},
176
- {"level": "INFO", "message": "Finished syncing ConcurrentCdkSource"},
154
+ {"level": "INFO", "message": "Finished syncing"},
177
155
  ]
178
156
  }
179
157
  )
@@ -202,11 +180,13 @@ test_concurrent_cdk_single_stream_with_primary_key = (
202
180
  .set_name("test_concurrent_cdk_single_stream_with_primary_key")
203
181
  .set_config({})
204
182
  .set_source_builder(
205
- ConcurrentSourceBuilder().set_streams(
183
+ ConcurrentSourceBuilder()
184
+ .set_streams(
206
185
  [
207
186
  _id_only_stream_with_primary_key,
208
187
  ]
209
188
  )
189
+ .set_message_repository(InMemoryMessageRepository())
210
190
  )
211
191
  .set_expected_records(
212
192
  [
@@ -239,14 +219,21 @@ test_concurrent_cdk_multiple_streams = (
239
219
  .set_name("test_concurrent_cdk_multiple_streams")
240
220
  .set_config({})
241
221
  .set_source_builder(
242
- ConcurrentSourceBuilder().set_streams(
222
+ ConcurrentSourceBuilder()
223
+ .set_streams(
243
224
  [
244
225
  _id_only_stream,
245
- ThreadBasedConcurrentStream(
226
+ DefaultStream(
246
227
  partition_generator=InMemoryPartitionGenerator(
247
- [InMemoryPartition("partition1", None, [Record({"id": "10", "key": "v1"}), Record({"id": "20", "key": "v2"})])]
228
+ [
229
+ InMemoryPartition(
230
+ "partition1",
231
+ "stream2",
232
+ None,
233
+ [Record({"id": "10", "key": "v1"}, "stream2"), Record({"id": "20", "key": "v2"}, "stream2")],
234
+ )
235
+ ]
248
236
  ),
249
- max_workers=1,
250
237
  name="stream2",
251
238
  json_schema={
252
239
  "type": "object",
@@ -258,13 +245,11 @@ test_concurrent_cdk_multiple_streams = (
258
245
  availability_strategy=AlwaysAvailableAvailabilityStrategy(),
259
246
  primary_key=[],
260
247
  cursor_field=None,
261
- slice_logger=NeverLogSliceLogger(),
262
248
  logger=logging.getLogger("test_logger"),
263
- message_repository=None,
264
- timeout_seconds=300,
265
249
  ),
266
250
  ]
267
251
  )
252
+ .set_message_repository(InMemoryMessageRepository())
268
253
  )
269
254
  .set_expected_records(
270
255
  [
@@ -347,11 +332,13 @@ test_concurrent_cdk_single_stream_multiple_partitions = (
347
332
  .set_name("test_concurrent_cdk_single_stream_multiple_partitions")
348
333
  .set_config({})
349
334
  .set_source_builder(
350
- ConcurrentSourceBuilder().set_streams(
335
+ ConcurrentSourceBuilder()
336
+ .set_streams(
351
337
  [
352
338
  _id_only_stream_multiple_partitions,
353
339
  ]
354
340
  )
341
+ .set_message_repository(InMemoryMessageRepository())
355
342
  )
356
343
  .set_expected_records(
357
344
  [
@@ -385,11 +372,13 @@ test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two = (
385
372
  .set_name("test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_2")
386
373
  .set_config({})
387
374
  .set_source_builder(
388
- ConcurrentSourceBuilder().set_streams(
375
+ ConcurrentSourceBuilder()
376
+ .set_streams(
389
377
  [
390
378
  _id_only_stream_multiple_partitions_concurrency_level_two,
391
379
  ]
392
380
  )
381
+ .set_message_repository(InMemoryMessageRepository())
393
382
  )
394
383
  .set_expected_records(
395
384
  [
@@ -1,22 +1,22 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
5
4
  import json
6
5
  import logging
7
6
  from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union
8
7
 
9
8
  from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode
10
- from airbyte_cdk.sources import AbstractSource
9
+ from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
10
+ from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
11
11
  from airbyte_cdk.sources.message import MessageRepository
12
12
  from airbyte_cdk.sources.streams import Stream
13
13
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable
15
15
  from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
16
+ from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
16
17
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
17
18
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
18
19
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
19
- from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
20
20
  from airbyte_cdk.sources.streams.core import StreamData
21
21
  from airbyte_cdk.sources.utils.slice_logger import SliceLogger
22
22
  from airbyte_protocol.models import ConfiguredAirbyteStream
@@ -37,17 +37,18 @@ class LegacyStream(Stream):
37
37
  yield from []
38
38
 
39
39
 
40
- class ConcurrentCdkSource(AbstractSource):
41
- def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]):
40
+ class ConcurrentCdkSource(ConcurrentSourceAdapter):
41
+ def __init__(self, streams: List[DefaultStream], message_repository: Optional[MessageRepository], max_workers, timeout_in_seconds):
42
+ concurrent_source = ConcurrentSource.create(1, 1, streams[0]._logger, NeverLogSliceLogger(), message_repository)
43
+ super().__init__(concurrent_source)
42
44
  self._streams = streams
43
- self._message_repository = message_repository
44
45
 
45
46
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
46
47
  # Check is not verified because it is up to the source to implement this method
47
48
  return True, None
48
49
 
49
50
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
50
- return [StreamFacade(s, LegacyStream(), NoopCursor()) for s in self._streams]
51
+ return [StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger) for s in self._streams]
51
52
 
52
53
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
53
54
  return ConnectorSpecification(connectionSpecification={})
@@ -56,7 +57,7 @@ class ConcurrentCdkSource(AbstractSource):
56
57
  return ConfiguredAirbyteCatalog(
57
58
  streams=[
58
59
  ConfiguredAirbyteStream(
59
- stream=StreamFacade(s, LegacyStream(), NoopCursor()).as_airbyte_stream(),
60
+ stream=StreamFacade(s, LegacyStream(), NoopCursor(), NeverLogSliceLogger(), s._logger).as_airbyte_stream(),
60
61
  sync_mode=SyncMode.full_refresh,
61
62
  destination_sync_mode=DestinationSyncMode.overwrite,
62
63
  )
@@ -78,10 +79,15 @@ class InMemoryPartitionGenerator(PartitionGenerator):
78
79
 
79
80
 
80
81
  class InMemoryPartition(Partition):
81
- def __init__(self, name, _slice, records):
82
+ def stream_name(self) -> str:
83
+ return self._stream_name
84
+
85
+ def __init__(self, name, stream_name, _slice, records):
82
86
  self._name = name
87
+ self._stream_name = stream_name
83
88
  self._slice = _slice
84
89
  self._records = records
90
+ self._is_closed = False
85
91
 
86
92
  def read(self) -> Iterable[Record]:
87
93
  for record_or_exception in self._records:
@@ -101,19 +107,22 @@ class InMemoryPartition(Partition):
101
107
  else:
102
108
  return hash(self._name)
103
109
 
110
+ def close(self) -> None:
111
+ self._is_closed = True
112
+
113
+ def is_closed(self) -> bool:
114
+ return self._is_closed
115
+
104
116
 
105
117
  class ConcurrentSourceBuilder(SourceBuilder[ConcurrentCdkSource]):
106
118
  def __init__(self):
107
- self._streams: List[ThreadBasedConcurrentStream] = []
119
+ self._streams: List[DefaultStream] = []
108
120
  self._message_repository = None
109
121
 
110
122
  def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> ConcurrentCdkSource:
111
- for stream in self._streams:
112
- if not stream._message_repository:
113
- stream._message_repository = self._message_repository
114
- return ConcurrentCdkSource(self._streams, self._message_repository)
123
+ return ConcurrentCdkSource(self._streams, self._message_repository, 1, 1)
115
124
 
116
- def set_streams(self, streams: List[ThreadBasedConcurrentStream]) -> "ConcurrentSourceBuilder":
125
+ def set_streams(self, streams: List[DefaultStream]) -> "ConcurrentSourceBuilder":
117
126
  self._streams = streams
118
127
  return self
119
128
 
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
4
+ import logging
5
5
  import unittest
6
6
  from unittest.mock import Mock
7
7
 
@@ -17,15 +17,18 @@ from airbyte_cdk.sources.streams.concurrent.adapters import (
17
17
  StreamPartitionGenerator,
18
18
  )
19
19
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE, StreamAvailable, StreamUnavailable
20
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
20
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
21
21
  from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
22
22
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
23
23
  from airbyte_cdk.sources.streams.core import Stream
24
+ from airbyte_cdk.sources.utils.slice_logger import SliceLogger
24
25
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
25
26
 
26
27
  _ANY_SYNC_MODE = SyncMode.full_refresh
27
28
  _ANY_STATE = {"state_key": "state_value"}
28
29
  _ANY_CURSOR_FIELD = ["a", "cursor", "key"]
30
+ _STREAM_NAME = "stream"
31
+ _ANY_CURSOR = Mock(spec=Cursor)
29
32
 
30
33
 
31
34
  @pytest.mark.parametrize(
@@ -77,7 +80,7 @@ def test_stream_partition_generator(sync_mode):
77
80
  stream_slices = [{"slice": 1}, {"slice": 2}]
78
81
  stream.stream_slices.return_value = stream_slices
79
82
 
80
- partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
83
+ partition_generator = StreamPartitionGenerator(stream, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
81
84
 
82
85
  partitions = list(partition_generator.generate())
83
86
  slices = [partition.to_slice() for partition in partitions]
@@ -88,16 +91,21 @@ def test_stream_partition_generator(sync_mode):
88
91
  @pytest.mark.parametrize(
89
92
  "transformer, expected_records",
90
93
  [
91
- pytest.param(TypeTransformer(TransformConfig.NoTransform), [Record({"data": "1"}), Record({"data": "2"})], id="test_no_transform"),
94
+ pytest.param(
95
+ TypeTransformer(TransformConfig.NoTransform),
96
+ [Record({"data": "1"}, _STREAM_NAME), Record({"data": "2"}, _STREAM_NAME)],
97
+ id="test_no_transform",
98
+ ),
92
99
  pytest.param(
93
100
  TypeTransformer(TransformConfig.DefaultSchemaNormalization),
94
- [Record({"data": 1}), Record({"data": 2})],
101
+ [Record({"data": 1}, _STREAM_NAME), Record({"data": 2}, _STREAM_NAME)],
95
102
  id="test_default_transform",
96
103
  ),
97
104
  ],
98
105
  )
99
106
  def test_stream_partition(transformer, expected_records):
100
107
  stream = Mock()
108
+ stream.name = _STREAM_NAME
101
109
  stream.get_json_schema.return_value = {"type": "object", "properties": {"data": {"type": ["integer"]}}}
102
110
  stream.transformer = transformer
103
111
  message_repository = InMemoryMessageRepository()
@@ -105,7 +113,7 @@ def test_stream_partition(transformer, expected_records):
105
113
  sync_mode = SyncMode.full_refresh
106
114
  cursor_field = None
107
115
  state = None
108
- partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state)
116
+ partition = StreamPartition(stream, _slice, message_repository, sync_mode, cursor_field, state, _ANY_CURSOR)
109
117
 
110
118
  a_log_message = AirbyteMessage(
111
119
  type=MessageType.LOG,
@@ -139,7 +147,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
139
147
  message_repository = InMemoryMessageRepository()
140
148
  _slice = None
141
149
 
142
- partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
150
+ partition = StreamPartition(stream, _slice, message_repository, _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
143
151
 
144
152
  stream.read_records.side_effect = Exception()
145
153
 
@@ -159,7 +167,7 @@ def test_stream_partition_raising_exception(exception_type, expected_display_mes
159
167
  def test_stream_partition_hash(_slice, expected_hash):
160
168
  stream = Mock()
161
169
  stream.name = "stream"
162
- partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE)
170
+ partition = StreamPartition(stream, _slice, Mock(), _ANY_SYNC_MODE, _ANY_CURSOR_FIELD, _ANY_STATE, _ANY_CURSOR)
163
171
 
164
172
  _hash = partition.__hash__()
165
173
  assert _hash == expected_hash
@@ -176,10 +184,11 @@ class StreamFacadeTest(unittest.TestCase):
176
184
  )
177
185
  self._legacy_stream = Mock(spec=Stream)
178
186
  self._cursor = Mock(spec=Cursor)
179
- self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor)
180
187
  self._logger = Mock()
188
+ self._slice_logger = Mock()
189
+ self._slice_logger.should_log_slice_message.return_value = False
190
+ self._facade = StreamFacade(self._abstract_stream, self._legacy_stream, self._cursor, self._slice_logger, self._logger)
181
191
  self._source = Mock()
182
- self._max_workers = 10
183
192
 
184
193
  self._stream = Mock()
185
194
  self._stream.primary_key = "id"
@@ -206,12 +215,16 @@ class StreamFacadeTest(unittest.TestCase):
206
215
 
207
216
  def test_given_cursor_is_noop_when_supports_incremental_then_return_legacy_stream_response(self):
208
217
  assert (
209
- StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=NoopCursor)).supports_incremental
218
+ StreamFacade(
219
+ self._abstract_stream, self._legacy_stream, _ANY_CURSOR, Mock(spec=SliceLogger), Mock(spec=logging.Logger)
220
+ ).supports_incremental
210
221
  == self._legacy_stream.supports_incremental
211
222
  )
212
223
 
213
224
  def test_given_cursor_is_not_noop_when_supports_incremental_then_return_true(self):
214
- assert StreamFacade(self._abstract_stream, self._legacy_stream, Mock(spec=Cursor)).supports_incremental
225
+ assert StreamFacade(
226
+ self._abstract_stream, self._legacy_stream, Mock(spec=Cursor), Mock(spec=SliceLogger), Mock(spec=logging.Logger)
227
+ ).supports_incremental
215
228
 
216
229
  def test_check_availability_is_delegated_to_wrapped_stream(self):
217
230
  availability = StreamAvailable()
@@ -221,8 +234,11 @@ class StreamFacadeTest(unittest.TestCase):
221
234
 
222
235
  def test_full_refresh(self):
223
236
  expected_stream_data = [{"data": 1}, {"data": 2}]
224
- records = [Record(data) for data in expected_stream_data]
225
- self._abstract_stream.read.return_value = records
237
+ records = [Record(data, "stream") for data in expected_stream_data]
238
+
239
+ partition = Mock()
240
+ partition.read.return_value = records
241
+ self._abstract_stream.generate_partitions.return_value = [partition]
226
242
 
227
243
  actual_stream_data = list(self._facade.read_records(SyncMode.full_refresh, None, None, None))
228
244
 
@@ -230,8 +246,10 @@ class StreamFacadeTest(unittest.TestCase):
230
246
 
231
247
  def test_read_records_full_refresh(self):
232
248
  expected_stream_data = [{"data": 1}, {"data": 2}]
233
- records = [Record(data) for data in expected_stream_data]
234
- self._abstract_stream.read.return_value = records
249
+ records = [Record(data, "stream") for data in expected_stream_data]
250
+ partition = Mock()
251
+ partition.read.return_value = records
252
+ self._abstract_stream.generate_partitions.return_value = [partition]
235
253
 
236
254
  actual_stream_data = list(self._facade.read_full_refresh(None, None, None))
237
255
 
@@ -239,8 +257,10 @@ class StreamFacadeTest(unittest.TestCase):
239
257
 
240
258
  def test_read_records_incremental(self):
241
259
  expected_stream_data = [{"data": 1}, {"data": 2}]
242
- records = [Record(data) for data in expected_stream_data]
243
- self._abstract_stream.read.return_value = records
260
+ records = [Record(data, "stream") for data in expected_stream_data]
261
+ partition = Mock()
262
+ partition.read.return_value = records
263
+ self._abstract_stream.generate_partitions.return_value = [partition]
244
264
 
245
265
  actual_stream_data = list(self._facade.read_incremental(None, None, None, None, None, None, None))
246
266
 
@@ -252,7 +272,7 @@ class StreamFacadeTest(unittest.TestCase):
252
272
  stream.primary_key = "id"
253
273
  stream.cursor_field = "cursor"
254
274
 
255
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
275
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
256
276
 
257
277
  assert facade.name == "stream"
258
278
  assert facade.cursor_field == "cursor"
@@ -264,8 +284,8 @@ class StreamFacadeTest(unittest.TestCase):
264
284
  stream.primary_key = None
265
285
  stream.cursor_field = []
266
286
 
267
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
268
- facade._abstract_stream._primary_key is None
287
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
288
+ assert facade._abstract_stream._primary_key == []
269
289
 
270
290
  def test_create_from_stream_with_composite_primary_key(self):
271
291
  stream = Mock()
@@ -273,15 +293,15 @@ class StreamFacadeTest(unittest.TestCase):
273
293
  stream.primary_key = ["id", "name"]
274
294
  stream.cursor_field = []
275
295
 
276
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
277
- facade._abstract_stream._primary_key == ["id", "name"]
296
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
297
+ assert facade._abstract_stream._primary_key == ["id", "name"]
278
298
 
279
299
  def test_create_from_stream_with_empty_list_cursor(self):
280
300
  stream = Mock()
281
301
  stream.primary_key = "id"
282
302
  stream.cursor_field = []
283
303
 
284
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
304
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
285
305
 
286
306
  assert facade.cursor_field == []
287
307
 
@@ -291,7 +311,7 @@ class StreamFacadeTest(unittest.TestCase):
291
311
  stream.primary_key = [["field", "id"]]
292
312
 
293
313
  with self.assertRaises(ValueError):
294
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
314
+ StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
295
315
 
296
316
  def test_create_from_stream_raises_exception_if_primary_key_has_invalid_type(self):
297
317
  stream = Mock()
@@ -299,7 +319,7 @@ class StreamFacadeTest(unittest.TestCase):
299
319
  stream.primary_key = 123
300
320
 
301
321
  with self.assertRaises(ValueError):
302
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
322
+ StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
303
323
 
304
324
  def test_create_from_stream_raises_exception_if_cursor_field_is_nested(self):
305
325
  stream = Mock()
@@ -308,7 +328,7 @@ class StreamFacadeTest(unittest.TestCase):
308
328
  stream.cursor_field = ["field", "cursor"]
309
329
 
310
330
  with self.assertRaises(ValueError):
311
- StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
331
+ StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
312
332
 
313
333
  def test_create_from_stream_with_cursor_field_as_list(self):
314
334
  stream = Mock()
@@ -316,7 +336,7 @@ class StreamFacadeTest(unittest.TestCase):
316
336
  stream.primary_key = "id"
317
337
  stream.cursor_field = ["cursor"]
318
338
 
319
- facade = StreamFacade.create_from_stream(stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
339
+ facade = StreamFacade.create_from_stream(stream, self._source, self._logger, _ANY_STATE, self._cursor)
320
340
  assert facade.cursor_field == "cursor"
321
341
 
322
342
  def test_create_from_stream_none_message_repository(self):
@@ -326,12 +346,12 @@ class StreamFacadeTest(unittest.TestCase):
326
346
  self._source.message_repository = None
327
347
 
328
348
  with self.assertRaises(ValueError):
329
- StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, {}, self._cursor)
349
+ StreamFacade.create_from_stream(self._stream, self._source, self._logger, {}, self._cursor)
330
350
 
331
351
  def test_get_error_display_message_no_display_message(self):
332
352
  self._stream.get_error_display_message.return_value = "display_message"
333
353
 
334
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
354
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
335
355
 
336
356
  expected_display_message = None
337
357
  e = Exception()
@@ -343,7 +363,7 @@ class StreamFacadeTest(unittest.TestCase):
343
363
  def test_get_error_display_message_with_display_message(self):
344
364
  self._stream.get_error_display_message.return_value = "display_message"
345
365
 
346
- facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, self._max_workers, _ANY_STATE, self._cursor)
366
+ facade = StreamFacade.create_from_stream(self._stream, self._source, self._logger, _ANY_STATE, self._cursor)
347
367
 
348
368
  expected_display_message = "display_message"
349
369
  e = ExceptionWithDisplayMessage("display_message")
@@ -364,7 +384,7 @@ def test_get_error_display_message(exception, expected_display_message):
364
384
  stream = Mock()
365
385
  legacy_stream = Mock()
366
386
  cursor = Mock(spec=Cursor)
367
- facade = StreamFacade(stream, legacy_stream, cursor)
387
+ facade = StreamFacade(stream, legacy_stream, cursor, Mock().Mock(), Mock())
368
388
 
369
389
  display_message = facade.get_error_display_message(exception)
370
390