airbyte-cdk 0.54.0__py3-none-any.whl → 0.55.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
  2. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
  3. airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
  4. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
  5. airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
  6. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
  7. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
  8. airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
  10. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
  11. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
  12. airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
  13. airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
  14. airbyte_cdk/sources/utils/slice_logger.py +5 -0
  15. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
  16. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +35 -23
  17. unit_tests/sources/concurrent_source/__init__.py +3 -0
  18. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
  19. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
  23. unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
  24. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
  25. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
  26. unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
  27. unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
  28. unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
  29. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
  30. unit_tests/sources/streams/test_stream_read.py +1 -2
  31. unit_tests/sources/test_concurrent_source.py +105 -0
  32. unit_tests/sources/test_source_read.py +461 -0
  33. airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
  34. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
  35. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-0.54.0.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -7,9 +7,9 @@ from unittest.mock import Mock
7
7
 
8
8
  import pytest
9
9
  from airbyte_cdk.models import SyncMode
10
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
10
11
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamPartition
11
12
  from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
12
- from airbyte_cdk.sources.streams.concurrent.partitions.types import PARTITIONS_GENERATED_SENTINEL
13
13
 
14
14
 
15
15
  @pytest.mark.parametrize(
@@ -17,21 +17,22 @@ from airbyte_cdk.sources.streams.concurrent.partitions.types import PARTITIONS_G
17
17
  )
18
18
  def test_partition_generator(slices):
19
19
  queue = Queue()
20
- partition_generator = PartitionEnqueuer(queue, PARTITIONS_GENERATED_SENTINEL)
20
+ partition_generator = PartitionEnqueuer(queue)
21
21
 
22
22
  stream = Mock()
23
23
  message_repository = Mock()
24
24
  sync_mode = SyncMode.full_refresh
25
25
  cursor_field = None
26
26
  state = None
27
- partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state) for s in slices]
28
- stream.generate.return_value = iter(partitions)
27
+ cursor = Mock()
28
+ partitions = [StreamPartition(stream, s, message_repository, sync_mode, cursor_field, state, cursor) for s in slices]
29
+ stream.generate_partitions.return_value = iter(partitions)
29
30
 
30
31
  partition_generator.generate_partitions(stream)
31
32
 
32
33
  actual_partitions = []
33
34
  while partition := queue.get(False):
34
- if partition == PARTITIONS_GENERATED_SENTINEL:
35
+ if isinstance(partition, PartitionGenerationCompletedSentinel):
35
36
  break
36
37
  actual_partitions.append(partition)
37
38
 
@@ -0,0 +1,604 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+ import logging
5
+ import unittest
6
+ from unittest.mock import Mock
7
+
8
+ import freezegun
9
+ from airbyte_cdk.models import (
10
+ AirbyteLogMessage,
11
+ AirbyteMessage,
12
+ AirbyteRecordMessage,
13
+ AirbyteStream,
14
+ AirbyteStreamStatus,
15
+ AirbyteStreamStatusTraceMessage,
16
+ AirbyteTraceMessage,
17
+ )
18
+ from airbyte_cdk.models import Level as LogLevel
19
+ from airbyte_cdk.models import StreamDescriptor, SyncMode, TraceType
20
+ from airbyte_cdk.models import Type as MessageType
21
+ from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
22
+ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
23
+ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
24
+ from airbyte_cdk.sources.message import LogMessage, MessageRepository
25
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
26
+ from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
27
+ from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
28
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
29
+ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
30
+ from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
31
+ from airbyte_cdk.sources.utils.slice_logger import SliceLogger
32
+
33
+ _STREAM_NAME = "stream"
34
+ _ANOTHER_STREAM_NAME = "stream2"
35
+
36
+
37
+ class TestConcurrentReadProcessor(unittest.TestCase):
38
+ def setUp(self):
39
+ self._partition_enqueuer = Mock(spec=PartitionEnqueuer)
40
+ self._thread_pool_manager = Mock(spec=ThreadPoolManager)
41
+
42
+ self._an_open_partition = Mock(spec=Partition)
43
+ self._an_open_partition.is_closed.return_value = False
44
+ self._log_message = Mock(spec=LogMessage)
45
+ self._an_open_partition.to_slice.return_value = self._log_message
46
+ self._an_open_partition.stream_name.return_value = _STREAM_NAME
47
+
48
+ self._a_closed_partition = Mock(spec=Partition)
49
+ self._a_closed_partition.is_closed.return_value = True
50
+ self._a_closed_partition.stream_name.return_value = _ANOTHER_STREAM_NAME
51
+
52
+ self._logger = Mock(spec=logging.Logger)
53
+ self._slice_logger = Mock(spec=SliceLogger)
54
+ self._slice_logger.create_slice_log_message.return_value = self._log_message
55
+ self._message_repository = Mock(spec=MessageRepository)
56
+ self._message_repository.consume_queue.return_value = []
57
+ self._partition_reader = Mock(spec=PartitionReader)
58
+
59
+ self._stream = Mock(spec=AbstractStream)
60
+ self._stream.name = _STREAM_NAME
61
+ self._stream.as_airbyte_stream.return_value = AirbyteStream(
62
+ name=_STREAM_NAME,
63
+ json_schema={},
64
+ supported_sync_modes=[SyncMode.full_refresh],
65
+ )
66
+ self._another_stream = Mock(spec=AbstractStream)
67
+ self._another_stream.name = _ANOTHER_STREAM_NAME
68
+ self._another_stream.as_airbyte_stream.return_value = AirbyteStream(
69
+ name=_ANOTHER_STREAM_NAME,
70
+ json_schema={},
71
+ supported_sync_modes=[SyncMode.full_refresh],
72
+ )
73
+
74
+ self._record_data = {"id": 1, "value": "A"}
75
+ self._record = Mock(spec=Record)
76
+ self._record.stream_name = _STREAM_NAME
77
+ self._record.data = self._record_data
78
+
79
+ def test_handle_partition_done_no_other_streams_to_generate_partitions_for(self):
80
+ stream_instances_to_read_from = [self._stream]
81
+
82
+ handler = ConcurrentReadProcessor(
83
+ stream_instances_to_read_from,
84
+ self._partition_enqueuer,
85
+ self._thread_pool_manager,
86
+ self._logger,
87
+ self._slice_logger,
88
+ self._message_repository,
89
+ self._partition_reader,
90
+ )
91
+ handler.start_next_partition_generator()
92
+ handler.on_partition(self._an_open_partition)
93
+
94
+ sentinel = PartitionGenerationCompletedSentinel(self._stream)
95
+ messages = list(handler.on_partition_generation_completed(sentinel))
96
+
97
+ expected_messages = []
98
+ assert expected_messages == messages
99
+
100
+ @freezegun.freeze_time("2020-01-01T00:00:00")
101
+ def test_handle_last_stream_partition_done(self):
102
+ stream_instances_to_read_from = [self._another_stream]
103
+
104
+ handler = ConcurrentReadProcessor(
105
+ stream_instances_to_read_from,
106
+ self._partition_enqueuer,
107
+ self._thread_pool_manager,
108
+ self._logger,
109
+ self._slice_logger,
110
+ self._message_repository,
111
+ self._partition_reader,
112
+ )
113
+ handler.start_next_partition_generator()
114
+ handler.on_partition(self._a_closed_partition)
115
+
116
+ sentinel = PartitionGenerationCompletedSentinel(self._another_stream)
117
+ messages = handler.on_partition_generation_completed(sentinel)
118
+
119
+ expected_messages = [
120
+ AirbyteMessage(
121
+ type=MessageType.TRACE,
122
+ trace=AirbyteTraceMessage(
123
+ type=TraceType.STREAM_STATUS,
124
+ emitted_at=1577836800000.0,
125
+ stream_status=AirbyteStreamStatusTraceMessage(
126
+ stream_descriptor=StreamDescriptor(name=_ANOTHER_STREAM_NAME),
127
+ status=AirbyteStreamStatus(AirbyteStreamStatus.COMPLETE),
128
+ ),
129
+ ),
130
+ )
131
+ ]
132
+ assert expected_messages == messages
133
+
134
+ def test_handle_partition(self):
135
+ stream_instances_to_read_from = [self._another_stream]
136
+
137
+ handler = ConcurrentReadProcessor(
138
+ stream_instances_to_read_from,
139
+ self._partition_enqueuer,
140
+ self._thread_pool_manager,
141
+ self._logger,
142
+ self._slice_logger,
143
+ self._message_repository,
144
+ self._partition_reader,
145
+ )
146
+
147
+ handler.on_partition(self._a_closed_partition)
148
+
149
+ self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._a_closed_partition)
150
+ assert self._a_closed_partition in handler._streams_to_partitions[_ANOTHER_STREAM_NAME]
151
+
152
+ def test_handle_partition_emits_log_message_if_it_should_be_logged(self):
153
+ stream_instances_to_read_from = [self._stream]
154
+ self._slice_logger = Mock(spec=SliceLogger)
155
+ self._slice_logger.should_log_slice_message.return_value = True
156
+ self._slice_logger.create_slice_log_message.return_value = self._log_message
157
+
158
+ handler = ConcurrentReadProcessor(
159
+ stream_instances_to_read_from,
160
+ self._partition_enqueuer,
161
+ self._thread_pool_manager,
162
+ self._logger,
163
+ self._slice_logger,
164
+ self._message_repository,
165
+ self._partition_reader,
166
+ )
167
+
168
+ handler.on_partition(self._an_open_partition)
169
+
170
+ self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._an_open_partition)
171
+ self._message_repository.emit_message.assert_called_with(self._log_message)
172
+ assert self._an_open_partition in handler._streams_to_partitions[_STREAM_NAME]
173
+
174
+ def test_handle_on_partition_complete_sentinel_with_messages_from_repository(self):
175
+ stream_instances_to_read_from = [self._stream]
176
+ partition = Mock(spec=Partition)
177
+ log_message = Mock(spec=LogMessage)
178
+ partition.to_slice.return_value = log_message
179
+ partition.stream_name.return_value = _STREAM_NAME
180
+ partition.is_closed.return_value = True
181
+
182
+ handler = ConcurrentReadProcessor(
183
+ stream_instances_to_read_from,
184
+ self._partition_enqueuer,
185
+ self._thread_pool_manager,
186
+ self._logger,
187
+ self._slice_logger,
188
+ self._message_repository,
189
+ self._partition_reader,
190
+ )
191
+ handler.start_next_partition_generator()
192
+
193
+ sentinel = PartitionCompleteSentinel(partition)
194
+
195
+ self._message_repository.consume_queue.return_value = [
196
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
197
+ ]
198
+
199
+ messages = list(handler.on_partition_complete_sentinel(sentinel))
200
+
201
+ expected_messages = [
202
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
203
+ ]
204
+ assert expected_messages == messages
205
+
206
+ partition.close.assert_called_once()
207
+
208
+ @freezegun.freeze_time("2020-01-01T00:00:00")
209
+ def test_handle_on_partition_complete_sentinel_yields_status_message_if_the_stream_is_done(self):
210
+ self._streams_currently_generating_partitions = [self._another_stream]
211
+ stream_instances_to_read_from = [self._another_stream]
212
+ log_message = Mock(spec=LogMessage)
213
+ self._a_closed_partition.to_slice.return_value = log_message
214
+ self._message_repository.consume_queue.return_value = []
215
+
216
+ handler = ConcurrentReadProcessor(
217
+ stream_instances_to_read_from,
218
+ self._partition_enqueuer,
219
+ self._thread_pool_manager,
220
+ self._logger,
221
+ self._slice_logger,
222
+ self._message_repository,
223
+ self._partition_reader,
224
+ )
225
+ handler.start_next_partition_generator()
226
+ handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._another_stream))
227
+
228
+ sentinel = PartitionCompleteSentinel(self._a_closed_partition)
229
+
230
+ messages = list(handler.on_partition_complete_sentinel(sentinel))
231
+
232
+ expected_messages = [
233
+ AirbyteMessage(
234
+ type=MessageType.TRACE,
235
+ trace=AirbyteTraceMessage(
236
+ type=TraceType.STREAM_STATUS,
237
+ stream_status=AirbyteStreamStatusTraceMessage(
238
+ stream_descriptor=StreamDescriptor(
239
+ name=_ANOTHER_STREAM_NAME,
240
+ ),
241
+ status=AirbyteStreamStatus.COMPLETE,
242
+ ),
243
+ emitted_at=1577836800000.0,
244
+ ),
245
+ )
246
+ ]
247
+ assert expected_messages == messages
248
+ self._a_closed_partition.close.assert_called_once()
249
+
250
+ @freezegun.freeze_time("2020-01-01T00:00:00")
251
+ def test_handle_on_partition_complete_sentinel_yields_no_status_message_if_the_stream_is_not_done(self):
252
+ stream_instances_to_read_from = [self._stream]
253
+ partition = Mock(spec=Partition)
254
+ log_message = Mock(spec=LogMessage)
255
+ partition.to_slice.return_value = log_message
256
+ partition.stream_name.return_value = _STREAM_NAME
257
+ partition.is_closed.return_value = True
258
+
259
+ handler = ConcurrentReadProcessor(
260
+ stream_instances_to_read_from,
261
+ self._partition_enqueuer,
262
+ self._thread_pool_manager,
263
+ self._logger,
264
+ self._slice_logger,
265
+ self._message_repository,
266
+ self._partition_reader,
267
+ )
268
+ handler.start_next_partition_generator()
269
+
270
+ sentinel = PartitionCompleteSentinel(partition)
271
+
272
+ messages = list(handler.on_partition_complete_sentinel(sentinel))
273
+
274
+ expected_messages = []
275
+ assert expected_messages == messages
276
+ partition.close.assert_called_once()
277
+
278
+ @freezegun.freeze_time("2020-01-01T00:00:00")
279
+ def test_on_record_no_status_message_no_repository_messge(self):
280
+ stream_instances_to_read_from = [self._stream]
281
+ partition = Mock(spec=Partition)
282
+ log_message = Mock(spec=LogMessage)
283
+ partition.to_slice.return_value = log_message
284
+ partition.stream_name.return_value = _STREAM_NAME
285
+ partition.is_closed.return_value = True
286
+ self._message_repository.consume_queue.return_value = []
287
+
288
+ handler = ConcurrentReadProcessor(
289
+ stream_instances_to_read_from,
290
+ self._partition_enqueuer,
291
+ self._thread_pool_manager,
292
+ self._logger,
293
+ self._slice_logger,
294
+ self._message_repository,
295
+ self._partition_reader,
296
+ )
297
+
298
+ # Simulate a first record
299
+ list(handler.on_record(self._record))
300
+
301
+ messages = list(handler.on_record(self._record))
302
+
303
+ expected_messages = [
304
+ AirbyteMessage(
305
+ type=MessageType.RECORD,
306
+ record=AirbyteRecordMessage(
307
+ stream=_STREAM_NAME,
308
+ data=self._record_data,
309
+ emitted_at=1577836800000,
310
+ ),
311
+ )
312
+ ]
313
+ assert expected_messages == messages
314
+
315
+ @freezegun.freeze_time("2020-01-01T00:00:00")
316
+ def test_on_record_with_repository_messge(self):
317
+ stream_instances_to_read_from = [self._stream]
318
+ partition = Mock(spec=Partition)
319
+ log_message = Mock(spec=LogMessage)
320
+ partition.to_slice.return_value = log_message
321
+ partition.stream_name.return_value = _STREAM_NAME
322
+ partition.is_closed.return_value = True
323
+ slice_logger = Mock(spec=SliceLogger)
324
+ slice_logger.should_log_slice_message.return_value = True
325
+ slice_logger.create_slice_log_message.return_value = log_message
326
+ self._message_repository.consume_queue.return_value = [
327
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
328
+ ]
329
+
330
+ handler = ConcurrentReadProcessor(
331
+ stream_instances_to_read_from,
332
+ self._partition_enqueuer,
333
+ self._thread_pool_manager,
334
+ self._logger,
335
+ self._slice_logger,
336
+ self._message_repository,
337
+ self._partition_reader,
338
+ )
339
+
340
+ stream = Mock(spec=AbstractStream)
341
+ stream.name = _STREAM_NAME
342
+ stream.as_airbyte_stream.return_value = AirbyteStream(
343
+ name=_STREAM_NAME,
344
+ json_schema={},
345
+ supported_sync_modes=[SyncMode.full_refresh],
346
+ )
347
+
348
+ # Simulate a first record
349
+ list(handler.on_record(self._record))
350
+
351
+ messages = list(handler.on_record(self._record))
352
+
353
+ expected_messages = [
354
+ AirbyteMessage(
355
+ type=MessageType.RECORD,
356
+ record=AirbyteRecordMessage(
357
+ stream=_STREAM_NAME,
358
+ data=self._record_data,
359
+ emitted_at=1577836800000,
360
+ ),
361
+ ),
362
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
363
+ ]
364
+ assert expected_messages == messages
365
+ assert handler._record_counter[_STREAM_NAME] == 2
366
+
367
+ @freezegun.freeze_time("2020-01-01T00:00:00")
368
+ def test_on_record_emits_status_message_on_first_record_no_repository_message(self):
369
+ self._streams_currently_generating_partitions = [_STREAM_NAME]
370
+ stream_instances_to_read_from = [self._stream]
371
+ partition = Mock(spec=Partition)
372
+ partition.stream_name.return_value = _STREAM_NAME
373
+ partition.is_closed.return_value = True
374
+
375
+ handler = ConcurrentReadProcessor(
376
+ stream_instances_to_read_from,
377
+ self._partition_enqueuer,
378
+ self._thread_pool_manager,
379
+ self._logger,
380
+ self._slice_logger,
381
+ self._message_repository,
382
+ self._partition_reader,
383
+ )
384
+
385
+ messages = list(handler.on_record(self._record))
386
+
387
+ expected_messages = [
388
+ AirbyteMessage(
389
+ type=MessageType.TRACE,
390
+ trace=AirbyteTraceMessage(
391
+ type=TraceType.STREAM_STATUS,
392
+ emitted_at=1577836800000.0,
393
+ stream_status=AirbyteStreamStatusTraceMessage(
394
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
395
+ ),
396
+ ),
397
+ ),
398
+ AirbyteMessage(
399
+ type=MessageType.RECORD,
400
+ record=AirbyteRecordMessage(
401
+ stream=_STREAM_NAME,
402
+ data=self._record_data,
403
+ emitted_at=1577836800000,
404
+ ),
405
+ ),
406
+ ]
407
+ assert expected_messages == messages
408
+
409
+ @freezegun.freeze_time("2020-01-01T00:00:00")
410
+ def test_on_record_emits_status_message_on_first_record_with_repository_message(self):
411
+ stream_instances_to_read_from = [self._stream]
412
+ partition = Mock(spec=Partition)
413
+ log_message = Mock(spec=LogMessage)
414
+ partition.to_slice.return_value = log_message
415
+ partition.stream_name.return_value = _STREAM_NAME
416
+ partition.is_closed.return_value = True
417
+ self._message_repository.consume_queue.return_value = [
418
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
419
+ ]
420
+
421
+ handler = ConcurrentReadProcessor(
422
+ stream_instances_to_read_from,
423
+ self._partition_enqueuer,
424
+ self._thread_pool_manager,
425
+ self._logger,
426
+ self._slice_logger,
427
+ self._message_repository,
428
+ self._partition_reader,
429
+ )
430
+
431
+ stream = Mock(spec=AbstractStream)
432
+ stream.name = _STREAM_NAME
433
+ stream.as_airbyte_stream.return_value = AirbyteStream(
434
+ name=_STREAM_NAME,
435
+ json_schema={},
436
+ supported_sync_modes=[SyncMode.full_refresh],
437
+ )
438
+
439
+ messages = list(handler.on_record(self._record))
440
+
441
+ expected_messages = [
442
+ AirbyteMessage(
443
+ type=MessageType.TRACE,
444
+ trace=AirbyteTraceMessage(
445
+ type=TraceType.STREAM_STATUS,
446
+ emitted_at=1577836800000.0,
447
+ stream_status=AirbyteStreamStatusTraceMessage(
448
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
449
+ ),
450
+ ),
451
+ ),
452
+ AirbyteMessage(
453
+ type=MessageType.RECORD,
454
+ record=AirbyteRecordMessage(
455
+ stream=_STREAM_NAME,
456
+ data=self._record_data,
457
+ emitted_at=1577836800000,
458
+ ),
459
+ ),
460
+ AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
461
+ ]
462
+ assert expected_messages == messages
463
+
464
+ @freezegun.freeze_time("2020-01-01T00:00:00")
465
+ def test_on_exception_stops_streams_and_raises_an_exception(self):
466
+ stream_instances_to_read_from = [self._stream, self._another_stream]
467
+
468
+ handler = ConcurrentReadProcessor(
469
+ stream_instances_to_read_from,
470
+ self._partition_enqueuer,
471
+ self._thread_pool_manager,
472
+ self._logger,
473
+ self._slice_logger,
474
+ self._message_repository,
475
+ self._partition_reader,
476
+ )
477
+ handler._streams_to_partitions = {_STREAM_NAME: {self._an_open_partition}, _ANOTHER_STREAM_NAME: {self._a_closed_partition}}
478
+
479
+ another_stream = Mock(spec=AbstractStream)
480
+ another_stream.name = _STREAM_NAME
481
+ another_stream.as_airbyte_stream.return_value = AirbyteStream(
482
+ name=_ANOTHER_STREAM_NAME,
483
+ json_schema={},
484
+ supported_sync_modes=[SyncMode.full_refresh],
485
+ )
486
+
487
+ exception = RuntimeError("Something went wrong")
488
+
489
+ messages = []
490
+
491
+ with self.assertRaises(RuntimeError):
492
+ for m in handler.on_exception(exception):
493
+ messages.append(m)
494
+
495
+ expected_message = [
496
+ AirbyteMessage(
497
+ type=MessageType.TRACE,
498
+ trace=AirbyteTraceMessage(
499
+ type=TraceType.STREAM_STATUS,
500
+ emitted_at=1577836800000.0,
501
+ stream_status=AirbyteStreamStatusTraceMessage(
502
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.INCOMPLETE)
503
+ ),
504
+ ),
505
+ )
506
+ ]
507
+
508
+ assert messages == expected_message
509
+ self._thread_pool_manager.shutdown.assert_called_once()
510
+
511
+ def test_is_done_is_false_if_there_are_any_instances_to_read_from(self):
512
+ stream_instances_to_read_from = [self._stream]
513
+
514
+ handler = ConcurrentReadProcessor(
515
+ stream_instances_to_read_from,
516
+ self._partition_enqueuer,
517
+ self._thread_pool_manager,
518
+ self._logger,
519
+ self._slice_logger,
520
+ self._message_repository,
521
+ self._partition_reader,
522
+ )
523
+
524
+ assert not handler.is_done()
525
+
526
+ def test_is_done_is_false_if_there_are_streams_still_generating_partitions(self):
527
+ stream_instances_to_read_from = [self._stream]
528
+
529
+ handler = ConcurrentReadProcessor(
530
+ stream_instances_to_read_from,
531
+ self._partition_enqueuer,
532
+ self._thread_pool_manager,
533
+ self._logger,
534
+ self._slice_logger,
535
+ self._message_repository,
536
+ self._partition_reader,
537
+ )
538
+
539
+ handler.start_next_partition_generator()
540
+
541
+ assert not handler.is_done()
542
+
543
+ def test_is_done_is_false_if_all_partitions_are_not_closed(self):
544
+ stream_instances_to_read_from = [self._stream]
545
+
546
+ handler = ConcurrentReadProcessor(
547
+ stream_instances_to_read_from,
548
+ self._partition_enqueuer,
549
+ self._thread_pool_manager,
550
+ self._logger,
551
+ self._slice_logger,
552
+ self._message_repository,
553
+ self._partition_reader,
554
+ )
555
+
556
+ handler.start_next_partition_generator()
557
+ handler.on_partition(self._an_open_partition)
558
+ handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._stream))
559
+
560
+ assert not handler.is_done()
561
+
562
+ def test_is_done_is_true_if_all_partitions_are_closed_and_no_streams_are_generating_partitions_and_none_are_still_to_run(self):
563
+ stream_instances_to_read_from = []
564
+
565
+ handler = ConcurrentReadProcessor(
566
+ stream_instances_to_read_from,
567
+ self._partition_enqueuer,
568
+ self._thread_pool_manager,
569
+ self._logger,
570
+ self._slice_logger,
571
+ self._message_repository,
572
+ self._partition_reader,
573
+ )
574
+
575
+ assert handler.is_done()
576
+
577
+ @freezegun.freeze_time("2020-01-01T00:00:00")
578
+ def test_start_next_partition_generator(self):
579
+ stream_instances_to_read_from = [self._stream]
580
+ handler = ConcurrentReadProcessor(
581
+ stream_instances_to_read_from,
582
+ self._partition_enqueuer,
583
+ self._thread_pool_manager,
584
+ self._logger,
585
+ self._slice_logger,
586
+ self._message_repository,
587
+ self._partition_reader,
588
+ )
589
+
590
+ status_message = handler.start_next_partition_generator()
591
+
592
+ assert status_message == AirbyteMessage(
593
+ type=MessageType.TRACE,
594
+ trace=AirbyteTraceMessage(
595
+ type=TraceType.STREAM_STATUS,
596
+ emitted_at=1577836800000.0,
597
+ stream_status=AirbyteStreamStatusTraceMessage(
598
+ stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.STARTED)
599
+ ),
600
+ ),
601
+ )
602
+
603
+ assert _STREAM_NAME in handler._streams_currently_generating_partitions
604
+ self._thread_pool_manager.submit.assert_called_with(self._partition_enqueuer.generate_partitions, self._stream)
@@ -33,7 +33,7 @@ def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
33
33
 
34
34
 
35
35
  def _record(cursor_value: Comparable) -> Record:
36
- return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
36
+ return Record(data={_A_CURSOR_FIELD_KEY: cursor_value}, stream_name=_A_STREAM_NAME)
37
37
 
38
38
 
39
39
  class ConcurrentCursorTest(TestCase):