airbyte-cdk 0.54.0__py3-none-any.whl → 0.55.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
  2. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
  3. airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
  4. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
  5. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
  6. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
  7. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
  8. airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
  10. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
  11. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
  12. airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
  13. airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
  14. airbyte_cdk/sources/utils/slice_logger.py +5 -0
  15. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
  16. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +35 -23
  17. unit_tests/sources/concurrent_source/__init__.py +3 -0
  18. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
  19. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
  23. unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
  24. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
  25. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
  26. unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
  27. unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
  28. unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
  29. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
  30. unit_tests/sources/streams/test_stream_read.py +1 -2
  31. unit_tests/sources/test_concurrent_source.py +105 -0
  32. unit_tests/sources/test_source_read.py +461 -0
  33. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
  34. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
  35. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -7,9 +7,9 @@ from unittest.mock import Mock
7
7
 
8
8
  import pytest
9
9
  from airbyte_cdk.models import SyncMode
10
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
10
11
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamPartition
11
12
  from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
12
- from airbyte_cdk.sources.streams.concurrent.partitions.types import PARTITIONS_GENERATED_SENTINEL
13
13
 
14
14
 
15
15
  @pytest.mark.parametrize(
@@ -17,21 +17,22 @@ from airbyte_cdk.sources.streams.concurrent.partitions.types import PARTITIONS_G
17
17
  )
18
18
  def test_partition_generator(slices):
19
19
  queue = Queue()
20
- partition_generator = PartitionEnqueuer(queue, PARTITIONS_GENERATED_SENTINEL)
20
+ partition_generator = PartitionEnqueuer(queue)
21
21
 
22
22
  stream = Mock()
23
23
  message_repository = Mock()
24
24
  sync_mode = SyncMode.full_refresh
25
25
  cursor_field = None
26
26
  state = None
27
- partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
28
- stream.generate.return_value = iter(partitions)
27
+ cursor = Mock()
28
+ partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state, cursor) for s in slices]
29
+ stream.generate_partitions.return_value = iter(partitions)
29
30
 
30
31
  partition_generator.generate_partitions(stream)
31
32
 
32
33
  actual_partitions = []
33
34
  while partition := queue.get(False):
34
- if partition == PARTITIONS_GENERATED_SENTINEL:
35
+ if isinstance(partition, PartitionGenerationCompletedSentinel):
35
36
  break
36
37
  actual_partitions.append(partition)
37
38
 
@@ -0,0 +1,604 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+ import logging
5
+ import unittest
6
+ from unittest.mock import Mock
7
+
8
+ import freezegun
9
+ from airbyte_cdk.models import (
10
+ AirbyteLogMessage,
11
+ AirbyteMessage,
12
+ AirbyteRecordMessage,
13
+ AirbyteStream,
14
+ AirbyteStreamStatus,
15
+ AirbyteStreamStatusTraceMessage,
16
+ AirbyteTraceMessage,
17
+ )
18
+ from airbyte_cdk.models import Level as LogLevel
19
+ from airbyte_cdk.models import StreamDescriptor, SyncMode, TraceType
20
+ from airbyte_cdk.models import Type as MessageType
21
+ from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
22
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
23
+ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
24
+ from airbyte_cdk.sources.message import LogMessage, MessageRepository
25
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
26
+ from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
27
+ from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
28
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
29
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
30
+ from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
31
+ from airbyte_cdk.sources.utils.slice_logger import SliceLogger
32
+
33
+ _STREAM_NAME = "stream"
34
+ _ANOTHER_STREAM_NAME = "stream2"
35
+
36
+
37
+ class TestConcurrentReadProcessor(unittest.TestCase):
38
+ def setUp(self):
39
+ self._partition_enqueuer = Mock(spec=PartitionEnqueuer)
40
+ self._thread_pool_manager = Mock(spec=ThreadPoolManager)
41
+
42
+ self._an_open_partition = Mock(spec=Partition)
43
+ self._an_open_partition.is_closed.return_value = False
44
+ self._log_message = Mock(spec=LogMessage)
45
+ self._an_open_partition.to_slice.return_value = self._log_message
46
+ self._an_open_partition.stream_name.return_value = _STREAM_NAME
47
+
48
+ self._a_closed_partition = Mock(spec=Partition)
49
+ self._a_closed_partition.is_closed.return_value = True
50
+ self._a_closed_partition.stream_name.return_value = _ANOTHER_STREAM_NAME
51
+
52
+ self._logger = Mock(spec=logging.Logger)
53
+ self._slice_logger = Mock(spec=SliceLogger)
54
+ self._slice_logger.create_slice_log_message.return_value = self._log_message
55
+ self._message_repository = Mock(spec=MessageRepository)
56
+ self._message_repository.consume_queue.return_value = []
57
+ self._partition_reader = Mock(spec=PartitionReader)
58
+
59
+ self._stream = Mock(spec=AbstractStream)
60
+ self._stream.name = _STREAM_NAME
61
+ self._stream.as_airbyte_stream.return_value = AirbyteStream(
62
+ name=_STREAM_NAME,
63
+ json_schema={},
64
+ supported_sync_modes=[SyncMode.full_refresh],
65
+ )
66
+ self._another_stream = Mock(spec=AbstractStream)
67
+ self._another_stream.name = _ANOTHER_STREAM_NAME
68
+ self._another_stream.as_airbyte_stream.return_value = AirbyteStream(
69
+ name=_ANOTHER_STREAM_NAME,
70
+ json_schema={},
71
+ supported_sync_modes=[SyncMode.full_refresh],
72
+ )
73
+
74
+ self._record_data = {"id": 1, "value": "A"}
75
+ self._record = Mock(spec=Record)
76
+ self._record.stream_name = _STREAM_NAME
77
+ self._record.data = self._record_data
78
+
79
+ def test_handle_partition_done_no_other_streams_to_generate_partitions_for(self):
80
+ stream_instances_to_read_from = [self._stream]
81
+
82
+ handler = ConcurrentReadProcessor(
83
+ stream_instances_to_read_from,
84
+ self._partition_enqueuer,
85
+ self._thread_pool_manager,
86
+ self._logger,
87
+ self._slice_logger,
88
+ self._message_repository,
89
+ self._partition_reader,
90
+ )
91
+ handler.start_next_partition_generator()
92
+ handler.on_partition(self._an_open_partition)
93
+
94
+ sentinel = PartitionGenerationCompletedSentinel(self._stream)
95
+ messages = list(handler.on_partition_generation_completed(sentinel))
96
+
97
+ expected_messages = []
98
+ assert expected_messages == messages
99
+
100
+ @freezegun.freeze_time("2020-01-01T00:00:00")
101
+ def test_handle_last_stream_partition_done(self):
102
+ stream_instances_to_read_from = [self._another_stream]
103
+
104
+ handler = ConcurrentReadProcessor(
105
+ stream_instances_to_read_from,
106
+ self._partition_enqueuer,
107
+ self._thread_pool_manager,
108
+ self._logger,
109
+ self._slice_logger,
110
+ self._message_repository,
111
+ self._partition_reader,
112
+ )
113
+ handler.start_next_partition_generator()
114
+ handler.on_partition(self._a_closed_partition)
115
+
116
+ sentinel = PartitionGenerationCompletedSentinel(self._another_stream)
117
+ messages = handler.on_partition_generation_completed(sentinel)
118
+
119
+ expected_messages = [
120
+ AirbyteMessage(
121
+ type=MessageType.TRACE,
122
+ trace=AirbyteTraceMessage(
123
+ type=TraceType.STREAM_STATUS,
124
+ emitted_at=1577836800000.0,
125
+ stream_status=AirbyteStreamStatusTraceMessage(
126
+ stream_descriptor=StreamDescriptor(name=_ANOTHER_STREAM_NAME),
127
+ status=AirbyteStreamStatus(AirbyteStreamStatus.COMPLETE),
128
+ ),
129
+ ),
130
+ )
131
+ ]
132
+ assert expected_messages == messages
133
+
134
+ def test_handle_partition(self):
135
+ stream_instances_to_read_from = [self._another_stream]
136
+
137
+ handler = ConcurrentReadProcessor(
138
+ stream_instances_to_read_from,
139
+ self._partition_enqueuer,
140
+ self._thread_pool_manager,
141
+ self._logger,
142
+ self._slice_logger,
143
+ self._message_repository,
144
+ self._partition_reader,
145
+ )
146
+
147
+ handler.on_partition(self._a_closed_partition)
148
+
149
+ self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._a_closed_partition)
150
+ assert self._a_closed_partition in handler._streams_to_partitions[_ANOTHER_STREAM_NAME]
151
+
152
+ def test_handle_partition_emits_log_message_if_it_should_be_logged(self):
153
+ stream_instances_to_read_from = [self._stream]
154
+ self._slice_logger = Mock(spec=SliceLogger)
155
+ self._slice_logger.should_log_slice_message.return_value = True
156
+ self._slice_logger.create_slice_log_message.return_value = self._log_message
157
+
158
+ handler = ConcurrentReadProcessor(
159
+ stream_instances_to_read_from,
160
+ self._partition_enqueuer,
161
+ self._thread_pool_manager,
162
+ self._logger,
163
+ self._slice_logger,
164
+ self._message_repository,
165
+ self._partition_reader,
166
+ )
167
+
168
+ handler.on_partition(self._an_open_partition)
169
+
170
+ self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._an_open_partition)
171
+ self._message_repository.emit_message.assert_called_with(self._log_message)
172
+ assert self._an_open_partition in handler._streams_to_partitions[_STREAM_NAME]
173
+
174
+ def test_handle_on_partition_complete_sentinel_with_messages_from_repository(self):
175
+ stream_instances_to_read_from = [self._stream]
176
+ partition = Mock(spec=Partition)
177
+ log_message = Mock(spec=LogMessage)
178
+ partition.to_slice.return_value = log_message
179
+ partition.stream_name.return_value = _STREAM_NAME
180
+ partition.is_closed.return_value = True
181
+
182
+ handler = ConcurrentReadProcessor(
183
+ stream_instances_to_read_from,
184
+ self._partition_enqueuer,
185
+ self._thread_pool_manager,
186
+ self._logger,
187
+ self._slice_logger,
188
+ self._message_repository,
189
+ self._partition_reader,
190
+ )
191
+ handler.start_next_partition_generator()
192
+
193
+ sentinel = PartitionCompleteSentinel(partition)
194
+
195
+ self._message_repository.consume_queue.return_value = [
196
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
197
+ ]
198
+
199
+ messages = list(handler.on_partition_complete_sentinel(sentinel))
200
+
201
+ expected_messages = [
202
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
203
+ ]
204
+ assert expected_messages == messages
205
+
206
+ partition.close.assert_called_once()
207
+
208
+ @freezegun.freeze_time("2020-01-01T00:00:00")
209
+ def test_handle_on_partition_complete_sentinel_yields_status_message_if_the_stream_is_done(self):
210
+ self._streams_currently_generating_partitions = [self._another_stream]
211
+ stream_instances_to_read_from = [self._another_stream]
212
+ log_message = Mock(spec=LogMessage)
213
+ self._a_closed_partition.to_slice.return_value = log_message
214
+ self._message_repository.consume_queue.return_value = []
215
+
216
+ handler = ConcurrentReadProcessor(
217
+ stream_instances_to_read_from,
218
+ self._partition_enqueuer,
219
+ self._thread_pool_manager,
220
+ self._logger,
221
+ self._slice_logger,
222
+ self._message_repository,
223
+ self._partition_reader,
224
+ )
225
+ handler.start_next_partition_generator()
226
+ handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._another_stream))
227
+
228
+ sentinel = PartitionCompleteSentinel(self._a_closed_partition)
229
+
230
+ messages = list(handler.on_partition_complete_sentinel(sentinel))
231
+
232
+ expected_messages = [
233
+ AirbyteMessage(
234
+ type=MessageType.TRACE,
235
+ trace=AirbyteTraceMessage(
236
+ type=TraceType.STREAM_STATUS,
237
+ stream_status=AirbyteStreamStatusTraceMessage(
238
+ stream_descriptor=StreamDescriptor(
239
+ name=_ANOTHER_STREAM_NAME,
240
+ ),
241
+ status=AirbyteStreamStatus.COMPLETE,
242
+ ),
243
+ emitted_at=1577836800000.0,
244
+ ),
245
+ )
246
+ ]
247
+ assert expected_messages == messages
248
+ self._a_closed_partition.close.assert_called_once()
249
+
250
+ @freezegun.freeze_time("2020-01-01T00:00:00")
251
+ def test_handle_on_partition_complete_sentinel_yields_no_status_message_if_the_stream_is_not_done(self):
252
+ stream_instances_to_read_from = [self._stream]
253
+ partition = Mock(spec=Partition)
254
+ log_message = Mock(spec=LogMessage)
255
+ partition.to_slice.return_value = log_message
256
+ partition.stream_name.return_value = _STREAM_NAME
257
+ partition.is_closed.return_value = True
258
+
259
+ handler = ConcurrentReadProcessor(
260
+ stream_instances_to_read_from,
261
+ self._partition_enqueuer,
262
+ self._thread_pool_manager,
263
+ self._logger,
264
+ self._slice_logger,
265
+ self._message_repository,
266
+ self._partition_reader,
267
+ )
268
+ handler.start_next_partition_generator()
269
+
270
+ sentinel = PartitionCompleteSentinel(partition)
271
+
272
+ messages = list(handler.on_partition_complete_sentinel(sentinel))
273
+
274
+ expected_messages = []
275
+ assert expected_messages == messages
276
+ partition.close.assert_called_once()
277
+
278
+ @freezegun.freeze_time("2020-01-01T00:00:00")
279
+ def test_on_record_no_status_message_no_repository_messge(self):
280
+ stream_instances_to_read_from = [self._stream]
281
+ partition = Mock(spec=Partition)
282
+ log_message = Mock(spec=LogMessage)
283
+ partition.to_slice.return_value = log_message
284
+ partition.stream_name.return_value = _STREAM_NAME
285
+ partition.is_closed.return_value = True
286
+ self._message_repository.consume_queue.return_value = []
287
+
288
+ handler = ConcurrentReadProcessor(
289
+ stream_instances_to_read_from,
290
+ self._partition_enqueuer,
291
+ self._thread_pool_manager,
292
+ self._logger,
293
+ self._slice_logger,
294
+ self._message_repository,
295
+ self._partition_reader,
296
+ )
297
+
298
+ # Simulate a first record
299
+ list(handler.on_record(self._record))
300
+
301
+ messages = list(handler.on_record(self._record))
302
+
303
+ expected_messages = [
304
+ AirbyteMessage(
305
+ type=MessageType.RECORD,
306
+ record=AirbyteRecordMessage(
307
+ stream=_STREAM_NAME,
308
+ data=self._record_data,
309
+ emitted_at=1577836800000,
310
+ ),
311
+ )
312
+ ]
313
+ assert expected_messages == messages
314
+
315
+ @freezegun.freeze_time("2020-01-01T00:00:00")
316
+ def test_on_record_with_repository_messge(self):
317
+ stream_instances_to_read_from = [self._stream]
318
+ partition = Mock(spec=Partition)
319
+ log_message = Mock(spec=LogMessage)
320
+ partition.to_slice.return_value = log_message
321
+ partition.stream_name.return_value = _STREAM_NAME
322
+ partition.is_closed.return_value = True
323
+ slice_logger = Mock(spec=SliceLogger)
324
+ slice_logger.should_log_slice_message.return_value = True
325
+ slice_logger.create_slice_log_message.return_value = log_message
326
+ self._message_repository.consume_queue.return_value = [
327
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
328
+ ]
329
+
330
+ handler = ConcurrentReadProcessor(
331
+ stream_instances_to_read_from,
332
+ self._partition_enqueuer,
333
+ self._thread_pool_manager,
334
+ self._logger,
335
+ self._slice_logger,
336
+ self._message_repository,
337
+ self._partition_reader,
338
+ )
339
+
340
+ stream = Mock(spec=AbstractStream)
341
+ stream.name = _STREAM_NAME
342
+ stream.as_airbyte_stream.return_value = AirbyteStream(
343
+ name=_STREAM_NAME,
344
+ json_schema={},
345
+ supported_sync_modes=[SyncMode.full_refresh],
346
+ )
347
+
348
+ # Simulate a first record
349
+ list(handler.on_record(self._record))
350
+
351
+ messages = list(handler.on_record(self._record))
352
+
353
+ expected_messages = [
354
+ AirbyteMessage(
355
+ type=MessageType.RECORD,
356
+ record=AirbyteRecordMessage(
357
+ stream=_STREAM_NAME,
358
+ data=self._record_data,
359
+ emitted_at=1577836800000,
360
+ ),
361
+ ),
362
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
363
+ ]
364
+ assert expected_messages == messages
365
+ assert handler._record_counter[_STREAM_NAME] == 2
366
+
367
+ @freezegun.freeze_time("2020-01-01T00:00:00")
368
+ def test_on_record_emits_status_message_on_first_record_no_repository_message(self):
369
+ self._streams_currently_generating_partitions = [_STREAM_NAME]
370
+ stream_instances_to_read_from = [self._stream]
371
+ partition = Mock(spec=Partition)
372
+ partition.stream_name.return_value = _STREAM_NAME
373
+ partition.is_closed.return_value = True
374
+
375
+ handler = ConcurrentReadProcessor(
376
+ stream_instances_to_read_from,
377
+ self._partition_enqueuer,
378
+ self._thread_pool_manager,
379
+ self._logger,
380
+ self._slice_logger,
381
+ self._message_repository,
382
+ self._partition_reader,
383
+ )
384
+
385
+ messages = list(handler.on_record(self._record))
386
+
387
+ expected_messages = [
388
+ AirbyteMessage(
389
+ type=MessageType.TRACE,
390
+ trace=AirbyteTraceMessage(
391
+ type=TraceType.STREAM_STATUS,
392
+ emitted_at=1577836800000.0,
393
+ stream_status=AirbyteStreamStatusTraceMessage(
394
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
395
+ ),
396
+ ),
397
+ ),
398
+ AirbyteMessage(
399
+ type=MessageType.RECORD,
400
+ record=AirbyteRecordMessage(
401
+ stream=_STREAM_NAME,
402
+ data=self._record_data,
403
+ emitted_at=1577836800000,
404
+ ),
405
+ ),
406
+ ]
407
+ assert expected_messages == messages
408
+
409
+ @freezegun.freeze_time("2020-01-01T00:00:00")
410
+ def test_on_record_emits_status_message_on_first_record_with_repository_message(self):
411
+ stream_instances_to_read_from = [self._stream]
412
+ partition = Mock(spec=Partition)
413
+ log_message = Mock(spec=LogMessage)
414
+ partition.to_slice.return_value = log_message
415
+ partition.stream_name.return_value = _STREAM_NAME
416
+ partition.is_closed.return_value = True
417
+ self._message_repository.consume_queue.return_value = [
418
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
419
+ ]
420
+
421
+ handler = ConcurrentReadProcessor(
422
+ stream_instances_to_read_from,
423
+ self._partition_enqueuer,
424
+ self._thread_pool_manager,
425
+ self._logger,
426
+ self._slice_logger,
427
+ self._message_repository,
428
+ self._partition_reader,
429
+ )
430
+
431
+ stream = Mock(spec=AbstractStream)
432
+ stream.name = _STREAM_NAME
433
+ stream.as_airbyte_stream.return_value = AirbyteStream(
434
+ name=_STREAM_NAME,
435
+ json_schema={},
436
+ supported_sync_modes=[SyncMode.full_refresh],
437
+ )
438
+
439
+ messages = list(handler.on_record(self._record))
440
+
441
+ expected_messages = [
442
+ AirbyteMessage(
443
+ type=MessageType.TRACE,
444
+ trace=AirbyteTraceMessage(
445
+ type=TraceType.STREAM_STATUS,
446
+ emitted_at=1577836800000.0,
447
+ stream_status=AirbyteStreamStatusTraceMessage(
448
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
449
+ ),
450
+ ),
451
+ ),
452
+ AirbyteMessage(
453
+ type=MessageType.RECORD,
454
+ record=AirbyteRecordMessage(
455
+ stream=_STREAM_NAME,
456
+ data=self._record_data,
457
+ emitted_at=1577836800000,
458
+ ),
459
+ ),
460
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
461
+ ]
462
+ assert expected_messages == messages
463
+
464
+ @freezegun.freeze_time("2020-01-01T00:00:00")
465
+ def test_on_exception_stops_streams_and_raises_an_exception(self):
466
+ stream_instances_to_read_from = [self._stream, self._another_stream]
467
+
468
+ handler = ConcurrentReadProcessor(
469
+ stream_instances_to_read_from,
470
+ self._partition_enqueuer,
471
+ self._thread_pool_manager,
472
+ self._logger,
473
+ self._slice_logger,
474
+ self._message_repository,
475
+ self._partition_reader,
476
+ )
477
+ handler._streams_to_partitions = {_STREAM_NAME: {self._an_open_partition}, _ANOTHER_STREAM_NAME: {self._a_closed_partition}}
478
+
479
+ another_stream = Mock(spec=AbstractStream)
480
+ another_stream.name = _STREAM_NAME
481
+ another_stream.as_airbyte_stream.return_value = AirbyteStream(
482
+ name=_ANOTHER_STREAM_NAME,
483
+ json_schema={},
484
+ supported_sync_modes=[SyncMode.full_refresh],
485
+ )
486
+
487
+ exception = RuntimeError("Something went wrong")
488
+
489
+ messages = []
490
+
491
+ with self.assertRaises(RuntimeError):
492
+ for m in handler.on_exception(exception):
493
+ messages.append(m)
494
+
495
+ expected_message = [
496
+ AirbyteMessage(
497
+ type=MessageType.TRACE,
498
+ trace=AirbyteTraceMessage(
499
+ type=TraceType.STREAM_STATUS,
500
+ emitted_at=1577836800000.0,
501
+ stream_status=AirbyteStreamStatusTraceMessage(
502
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.INCOMPLETE)
503
+ ),
504
+ ),
505
+ )
506
+ ]
507
+
508
+ assert messages == expected_message
509
+ self._thread_pool_manager.shutdown.assert_called_once()
510
+
511
+ def test_is_done_is_false_if_there_are_any_instances_to_read_from(self):
512
+ stream_instances_to_read_from = [self._stream]
513
+
514
+ handler = ConcurrentReadProcessor(
515
+ stream_instances_to_read_from,
516
+ self._partition_enqueuer,
517
+ self._thread_pool_manager,
518
+ self._logger,
519
+ self._slice_logger,
520
+ self._message_repository,
521
+ self._partition_reader,
522
+ )
523
+
524
+ assert not handler.is_done()
525
+
526
+ def test_is_done_is_false_if_there_are_streams_still_generating_partitions(self):
527
+ stream_instances_to_read_from = [self._stream]
528
+
529
+ handler = ConcurrentReadProcessor(
530
+ stream_instances_to_read_from,
531
+ self._partition_enqueuer,
532
+ self._thread_pool_manager,
533
+ self._logger,
534
+ self._slice_logger,
535
+ self._message_repository,
536
+ self._partition_reader,
537
+ )
538
+
539
+ handler.start_next_partition_generator()
540
+
541
+ assert not handler.is_done()
542
+
543
+ def test_is_done_is_false_if_all_partitions_are_not_closed(self):
544
+ stream_instances_to_read_from = [self._stream]
545
+
546
+ handler = ConcurrentReadProcessor(
547
+ stream_instances_to_read_from,
548
+ self._partition_enqueuer,
549
+ self._thread_pool_manager,
550
+ self._logger,
551
+ self._slice_logger,
552
+ self._message_repository,
553
+ self._partition_reader,
554
+ )
555
+
556
+ handler.start_next_partition_generator()
557
+ handler.on_partition(self._an_open_partition)
558
+ handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._stream))
559
+
560
+ assert not handler.is_done()
561
+
562
+ def test_is_done_is_true_if_all_partitions_are_closed_and_no_streams_are_generating_partitions_and_none_are_still_to_run(self):
563
+ stream_instances_to_read_from = []
564
+
565
+ handler = ConcurrentReadProcessor(
566
+ stream_instances_to_read_from,
567
+ self._partition_enqueuer,
568
+ self._thread_pool_manager,
569
+ self._logger,
570
+ self._slice_logger,
571
+ self._message_repository,
572
+ self._partition_reader,
573
+ )
574
+
575
+ assert handler.is_done()
576
+
577
+ @freezegun.freeze_time("2020-01-01T00:00:00")
578
+ def test_start_next_partition_generator(self):
579
+ stream_instances_to_read_from = [self._stream]
580
+ handler = ConcurrentReadProcessor(
581
+ stream_instances_to_read_from,
582
+ self._partition_enqueuer,
583
+ self._thread_pool_manager,
584
+ self._logger,
585
+ self._slice_logger,
586
+ self._message_repository,
587
+ self._partition_reader,
588
+ )
589
+
590
+ status_message = handler.start_next_partition_generator()
591
+
592
+ assert status_message == AirbyteMessage(
593
+ type=MessageType.TRACE,
594
+ trace=AirbyteTraceMessage(
595
+ type=TraceType.STREAM_STATUS,
596
+ emitted_at=1577836800000.0,
597
+ stream_status=AirbyteStreamStatusTraceMessage(
598
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.STARTED)
599
+ ),
600
+ ),
601
+ )
602
+
603
+ assert _STREAM_NAME in handler._streams_currently_generating_partitions
604
+ self._thread_pool_manager.submit.assert_called_with(self._partition_enqueuer.generate_partitions, self._stream)
@@ -33,7 +33,7 @@ def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
33
33
 
34
34
 
35
35
  def _record(cursor_value: Comparable) -> Record:
36
- return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
36
+ return Record(data={_A_CURSOR_FIELD_KEY: cursor_value}, stream_name=_A_STREAM_NAME)
37
37
 
38
38
 
39
39
  class ConcurrentCursorTest(TestCase):