airbyte-cdk 0.53.9__py3-none-any.whl → 0.55.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +16 -4
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +14 -14
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +2 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
- airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
- airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
- airbyte_cdk/sources/utils/slice_logger.py +5 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +40 -28
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +1 -1
- unit_tests/sources/concurrent_source/__init__.py +3 -0
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +33 -0
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +9 -2
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
- unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
- unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
- unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
- unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
- unit_tests/sources/streams/test_stream_read.py +1 -2
- unit_tests/sources/test_concurrent_source.py +105 -0
- unit_tests/sources/test_source_read.py +461 -0
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,604 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
import logging
|
5
|
+
import unittest
|
6
|
+
from unittest.mock import Mock
|
7
|
+
|
8
|
+
import freezegun
|
9
|
+
from airbyte_cdk.models import (
|
10
|
+
AirbyteLogMessage,
|
11
|
+
AirbyteMessage,
|
12
|
+
AirbyteRecordMessage,
|
13
|
+
AirbyteStream,
|
14
|
+
AirbyteStreamStatus,
|
15
|
+
AirbyteStreamStatusTraceMessage,
|
16
|
+
AirbyteTraceMessage,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.models import Level as LogLevel
|
19
|
+
from airbyte_cdk.models import StreamDescriptor, SyncMode, TraceType
|
20
|
+
from airbyte_cdk.models import Type as MessageType
|
21
|
+
from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
|
22
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
|
23
|
+
from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
|
24
|
+
from airbyte_cdk.sources.message import LogMessage, MessageRepository
|
25
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
26
|
+
from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
|
27
|
+
from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
|
28
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
29
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
30
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
|
31
|
+
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
32
|
+
|
33
|
+
_STREAM_NAME = "stream"
|
34
|
+
_ANOTHER_STREAM_NAME = "stream2"
|
35
|
+
|
36
|
+
|
37
|
+
class TestConcurrentReadProcessor(unittest.TestCase):
|
38
|
+
def setUp(self):
|
39
|
+
self._partition_enqueuer = Mock(spec=PartitionEnqueuer)
|
40
|
+
self._thread_pool_manager = Mock(spec=ThreadPoolManager)
|
41
|
+
|
42
|
+
self._an_open_partition = Mock(spec=Partition)
|
43
|
+
self._an_open_partition.is_closed.return_value = False
|
44
|
+
self._log_message = Mock(spec=LogMessage)
|
45
|
+
self._an_open_partition.to_slice.return_value = self._log_message
|
46
|
+
self._an_open_partition.stream_name.return_value = _STREAM_NAME
|
47
|
+
|
48
|
+
self._a_closed_partition = Mock(spec=Partition)
|
49
|
+
self._a_closed_partition.is_closed.return_value = True
|
50
|
+
self._a_closed_partition.stream_name.return_value = _ANOTHER_STREAM_NAME
|
51
|
+
|
52
|
+
self._logger = Mock(spec=logging.Logger)
|
53
|
+
self._slice_logger = Mock(spec=SliceLogger)
|
54
|
+
self._slice_logger.create_slice_log_message.return_value = self._log_message
|
55
|
+
self._message_repository = Mock(spec=MessageRepository)
|
56
|
+
self._message_repository.consume_queue.return_value = []
|
57
|
+
self._partition_reader = Mock(spec=PartitionReader)
|
58
|
+
|
59
|
+
self._stream = Mock(spec=AbstractStream)
|
60
|
+
self._stream.name = _STREAM_NAME
|
61
|
+
self._stream.as_airbyte_stream.return_value = AirbyteStream(
|
62
|
+
name=_STREAM_NAME,
|
63
|
+
json_schema={},
|
64
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
65
|
+
)
|
66
|
+
self._another_stream = Mock(spec=AbstractStream)
|
67
|
+
self._another_stream.name = _ANOTHER_STREAM_NAME
|
68
|
+
self._another_stream.as_airbyte_stream.return_value = AirbyteStream(
|
69
|
+
name=_ANOTHER_STREAM_NAME,
|
70
|
+
json_schema={},
|
71
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
72
|
+
)
|
73
|
+
|
74
|
+
self._record_data = {"id": 1, "value": "A"}
|
75
|
+
self._record = Mock(spec=Record)
|
76
|
+
self._record.stream_name = _STREAM_NAME
|
77
|
+
self._record.data = self._record_data
|
78
|
+
|
79
|
+
def test_handle_partition_done_no_other_streams_to_generate_partitions_for(self):
|
80
|
+
stream_instances_to_read_from = [self._stream]
|
81
|
+
|
82
|
+
handler = ConcurrentReadProcessor(
|
83
|
+
stream_instances_to_read_from,
|
84
|
+
self._partition_enqueuer,
|
85
|
+
self._thread_pool_manager,
|
86
|
+
self._logger,
|
87
|
+
self._slice_logger,
|
88
|
+
self._message_repository,
|
89
|
+
self._partition_reader,
|
90
|
+
)
|
91
|
+
handler.start_next_partition_generator()
|
92
|
+
handler.on_partition(self._an_open_partition)
|
93
|
+
|
94
|
+
sentinel = PartitionGenerationCompletedSentinel(self._stream)
|
95
|
+
messages = list(handler.on_partition_generation_completed(sentinel))
|
96
|
+
|
97
|
+
expected_messages = []
|
98
|
+
assert expected_messages == messages
|
99
|
+
|
100
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
101
|
+
def test_handle_last_stream_partition_done(self):
|
102
|
+
stream_instances_to_read_from = [self._another_stream]
|
103
|
+
|
104
|
+
handler = ConcurrentReadProcessor(
|
105
|
+
stream_instances_to_read_from,
|
106
|
+
self._partition_enqueuer,
|
107
|
+
self._thread_pool_manager,
|
108
|
+
self._logger,
|
109
|
+
self._slice_logger,
|
110
|
+
self._message_repository,
|
111
|
+
self._partition_reader,
|
112
|
+
)
|
113
|
+
handler.start_next_partition_generator()
|
114
|
+
handler.on_partition(self._a_closed_partition)
|
115
|
+
|
116
|
+
sentinel = PartitionGenerationCompletedSentinel(self._another_stream)
|
117
|
+
messages = handler.on_partition_generation_completed(sentinel)
|
118
|
+
|
119
|
+
expected_messages = [
|
120
|
+
AirbyteMessage(
|
121
|
+
type=MessageType.TRACE,
|
122
|
+
trace=AirbyteTraceMessage(
|
123
|
+
type=TraceType.STREAM_STATUS,
|
124
|
+
emitted_at=1577836800000.0,
|
125
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
126
|
+
stream_descriptor=StreamDescriptor(name=_ANOTHER_STREAM_NAME),
|
127
|
+
status=AirbyteStreamStatus(AirbyteStreamStatus.COMPLETE),
|
128
|
+
),
|
129
|
+
),
|
130
|
+
)
|
131
|
+
]
|
132
|
+
assert expected_messages == messages
|
133
|
+
|
134
|
+
def test_handle_partition(self):
|
135
|
+
stream_instances_to_read_from = [self._another_stream]
|
136
|
+
|
137
|
+
handler = ConcurrentReadProcessor(
|
138
|
+
stream_instances_to_read_from,
|
139
|
+
self._partition_enqueuer,
|
140
|
+
self._thread_pool_manager,
|
141
|
+
self._logger,
|
142
|
+
self._slice_logger,
|
143
|
+
self._message_repository,
|
144
|
+
self._partition_reader,
|
145
|
+
)
|
146
|
+
|
147
|
+
handler.on_partition(self._a_closed_partition)
|
148
|
+
|
149
|
+
self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._a_closed_partition)
|
150
|
+
assert self._a_closed_partition in handler._streams_to_partitions[_ANOTHER_STREAM_NAME]
|
151
|
+
|
152
|
+
def test_handle_partition_emits_log_message_if_it_should_be_logged(self):
|
153
|
+
stream_instances_to_read_from = [self._stream]
|
154
|
+
self._slice_logger = Mock(spec=SliceLogger)
|
155
|
+
self._slice_logger.should_log_slice_message.return_value = True
|
156
|
+
self._slice_logger.create_slice_log_message.return_value = self._log_message
|
157
|
+
|
158
|
+
handler = ConcurrentReadProcessor(
|
159
|
+
stream_instances_to_read_from,
|
160
|
+
self._partition_enqueuer,
|
161
|
+
self._thread_pool_manager,
|
162
|
+
self._logger,
|
163
|
+
self._slice_logger,
|
164
|
+
self._message_repository,
|
165
|
+
self._partition_reader,
|
166
|
+
)
|
167
|
+
|
168
|
+
handler.on_partition(self._an_open_partition)
|
169
|
+
|
170
|
+
self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._an_open_partition)
|
171
|
+
self._message_repository.emit_message.assert_called_with(self._log_message)
|
172
|
+
assert self._an_open_partition in handler._streams_to_partitions[_STREAM_NAME]
|
173
|
+
|
174
|
+
def test_handle_on_partition_complete_sentinel_with_messages_from_repository(self):
|
175
|
+
stream_instances_to_read_from = [self._stream]
|
176
|
+
partition = Mock(spec=Partition)
|
177
|
+
log_message = Mock(spec=LogMessage)
|
178
|
+
partition.to_slice.return_value = log_message
|
179
|
+
partition.stream_name.return_value = _STREAM_NAME
|
180
|
+
partition.is_closed.return_value = True
|
181
|
+
|
182
|
+
handler = ConcurrentReadProcessor(
|
183
|
+
stream_instances_to_read_from,
|
184
|
+
self._partition_enqueuer,
|
185
|
+
self._thread_pool_manager,
|
186
|
+
self._logger,
|
187
|
+
self._slice_logger,
|
188
|
+
self._message_repository,
|
189
|
+
self._partition_reader,
|
190
|
+
)
|
191
|
+
handler.start_next_partition_generator()
|
192
|
+
|
193
|
+
sentinel = PartitionCompleteSentinel(partition)
|
194
|
+
|
195
|
+
self._message_repository.consume_queue.return_value = [
|
196
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
197
|
+
]
|
198
|
+
|
199
|
+
messages = list(handler.on_partition_complete_sentinel(sentinel))
|
200
|
+
|
201
|
+
expected_messages = [
|
202
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
203
|
+
]
|
204
|
+
assert expected_messages == messages
|
205
|
+
|
206
|
+
partition.close.assert_called_once()
|
207
|
+
|
208
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
209
|
+
def test_handle_on_partition_complete_sentinel_yields_status_message_if_the_stream_is_done(self):
|
210
|
+
self._streams_currently_generating_partitions = [self._another_stream]
|
211
|
+
stream_instances_to_read_from = [self._another_stream]
|
212
|
+
log_message = Mock(spec=LogMessage)
|
213
|
+
self._a_closed_partition.to_slice.return_value = log_message
|
214
|
+
self._message_repository.consume_queue.return_value = []
|
215
|
+
|
216
|
+
handler = ConcurrentReadProcessor(
|
217
|
+
stream_instances_to_read_from,
|
218
|
+
self._partition_enqueuer,
|
219
|
+
self._thread_pool_manager,
|
220
|
+
self._logger,
|
221
|
+
self._slice_logger,
|
222
|
+
self._message_repository,
|
223
|
+
self._partition_reader,
|
224
|
+
)
|
225
|
+
handler.start_next_partition_generator()
|
226
|
+
handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._another_stream))
|
227
|
+
|
228
|
+
sentinel = PartitionCompleteSentinel(self._a_closed_partition)
|
229
|
+
|
230
|
+
messages = list(handler.on_partition_complete_sentinel(sentinel))
|
231
|
+
|
232
|
+
expected_messages = [
|
233
|
+
AirbyteMessage(
|
234
|
+
type=MessageType.TRACE,
|
235
|
+
trace=AirbyteTraceMessage(
|
236
|
+
type=TraceType.STREAM_STATUS,
|
237
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
238
|
+
stream_descriptor=StreamDescriptor(
|
239
|
+
name=_ANOTHER_STREAM_NAME,
|
240
|
+
),
|
241
|
+
status=AirbyteStreamStatus.COMPLETE,
|
242
|
+
),
|
243
|
+
emitted_at=1577836800000.0,
|
244
|
+
),
|
245
|
+
)
|
246
|
+
]
|
247
|
+
assert expected_messages == messages
|
248
|
+
self._a_closed_partition.close.assert_called_once()
|
249
|
+
|
250
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
251
|
+
def test_handle_on_partition_complete_sentinel_yields_no_status_message_if_the_stream_is_not_done(self):
|
252
|
+
stream_instances_to_read_from = [self._stream]
|
253
|
+
partition = Mock(spec=Partition)
|
254
|
+
log_message = Mock(spec=LogMessage)
|
255
|
+
partition.to_slice.return_value = log_message
|
256
|
+
partition.stream_name.return_value = _STREAM_NAME
|
257
|
+
partition.is_closed.return_value = True
|
258
|
+
|
259
|
+
handler = ConcurrentReadProcessor(
|
260
|
+
stream_instances_to_read_from,
|
261
|
+
self._partition_enqueuer,
|
262
|
+
self._thread_pool_manager,
|
263
|
+
self._logger,
|
264
|
+
self._slice_logger,
|
265
|
+
self._message_repository,
|
266
|
+
self._partition_reader,
|
267
|
+
)
|
268
|
+
handler.start_next_partition_generator()
|
269
|
+
|
270
|
+
sentinel = PartitionCompleteSentinel(partition)
|
271
|
+
|
272
|
+
messages = list(handler.on_partition_complete_sentinel(sentinel))
|
273
|
+
|
274
|
+
expected_messages = []
|
275
|
+
assert expected_messages == messages
|
276
|
+
partition.close.assert_called_once()
|
277
|
+
|
278
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
279
|
+
def test_on_record_no_status_message_no_repository_messge(self):
|
280
|
+
stream_instances_to_read_from = [self._stream]
|
281
|
+
partition = Mock(spec=Partition)
|
282
|
+
log_message = Mock(spec=LogMessage)
|
283
|
+
partition.to_slice.return_value = log_message
|
284
|
+
partition.stream_name.return_value = _STREAM_NAME
|
285
|
+
partition.is_closed.return_value = True
|
286
|
+
self._message_repository.consume_queue.return_value = []
|
287
|
+
|
288
|
+
handler = ConcurrentReadProcessor(
|
289
|
+
stream_instances_to_read_from,
|
290
|
+
self._partition_enqueuer,
|
291
|
+
self._thread_pool_manager,
|
292
|
+
self._logger,
|
293
|
+
self._slice_logger,
|
294
|
+
self._message_repository,
|
295
|
+
self._partition_reader,
|
296
|
+
)
|
297
|
+
|
298
|
+
# Simulate a first record
|
299
|
+
list(handler.on_record(self._record))
|
300
|
+
|
301
|
+
messages = list(handler.on_record(self._record))
|
302
|
+
|
303
|
+
expected_messages = [
|
304
|
+
AirbyteMessage(
|
305
|
+
type=MessageType.RECORD,
|
306
|
+
record=AirbyteRecordMessage(
|
307
|
+
stream=_STREAM_NAME,
|
308
|
+
data=self._record_data,
|
309
|
+
emitted_at=1577836800000,
|
310
|
+
),
|
311
|
+
)
|
312
|
+
]
|
313
|
+
assert expected_messages == messages
|
314
|
+
|
315
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
316
|
+
def test_on_record_with_repository_messge(self):
|
317
|
+
stream_instances_to_read_from = [self._stream]
|
318
|
+
partition = Mock(spec=Partition)
|
319
|
+
log_message = Mock(spec=LogMessage)
|
320
|
+
partition.to_slice.return_value = log_message
|
321
|
+
partition.stream_name.return_value = _STREAM_NAME
|
322
|
+
partition.is_closed.return_value = True
|
323
|
+
slice_logger = Mock(spec=SliceLogger)
|
324
|
+
slice_logger.should_log_slice_message.return_value = True
|
325
|
+
slice_logger.create_slice_log_message.return_value = log_message
|
326
|
+
self._message_repository.consume_queue.return_value = [
|
327
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
328
|
+
]
|
329
|
+
|
330
|
+
handler = ConcurrentReadProcessor(
|
331
|
+
stream_instances_to_read_from,
|
332
|
+
self._partition_enqueuer,
|
333
|
+
self._thread_pool_manager,
|
334
|
+
self._logger,
|
335
|
+
self._slice_logger,
|
336
|
+
self._message_repository,
|
337
|
+
self._partition_reader,
|
338
|
+
)
|
339
|
+
|
340
|
+
stream = Mock(spec=AbstractStream)
|
341
|
+
stream.name = _STREAM_NAME
|
342
|
+
stream.as_airbyte_stream.return_value = AirbyteStream(
|
343
|
+
name=_STREAM_NAME,
|
344
|
+
json_schema={},
|
345
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
346
|
+
)
|
347
|
+
|
348
|
+
# Simulate a first record
|
349
|
+
list(handler.on_record(self._record))
|
350
|
+
|
351
|
+
messages = list(handler.on_record(self._record))
|
352
|
+
|
353
|
+
expected_messages = [
|
354
|
+
AirbyteMessage(
|
355
|
+
type=MessageType.RECORD,
|
356
|
+
record=AirbyteRecordMessage(
|
357
|
+
stream=_STREAM_NAME,
|
358
|
+
data=self._record_data,
|
359
|
+
emitted_at=1577836800000,
|
360
|
+
),
|
361
|
+
),
|
362
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
|
363
|
+
]
|
364
|
+
assert expected_messages == messages
|
365
|
+
assert handler._record_counter[_STREAM_NAME] == 2
|
366
|
+
|
367
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
368
|
+
def test_on_record_emits_status_message_on_first_record_no_repository_message(self):
|
369
|
+
self._streams_currently_generating_partitions = [_STREAM_NAME]
|
370
|
+
stream_instances_to_read_from = [self._stream]
|
371
|
+
partition = Mock(spec=Partition)
|
372
|
+
partition.stream_name.return_value = _STREAM_NAME
|
373
|
+
partition.is_closed.return_value = True
|
374
|
+
|
375
|
+
handler = ConcurrentReadProcessor(
|
376
|
+
stream_instances_to_read_from,
|
377
|
+
self._partition_enqueuer,
|
378
|
+
self._thread_pool_manager,
|
379
|
+
self._logger,
|
380
|
+
self._slice_logger,
|
381
|
+
self._message_repository,
|
382
|
+
self._partition_reader,
|
383
|
+
)
|
384
|
+
|
385
|
+
messages = list(handler.on_record(self._record))
|
386
|
+
|
387
|
+
expected_messages = [
|
388
|
+
AirbyteMessage(
|
389
|
+
type=MessageType.TRACE,
|
390
|
+
trace=AirbyteTraceMessage(
|
391
|
+
type=TraceType.STREAM_STATUS,
|
392
|
+
emitted_at=1577836800000.0,
|
393
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
394
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
|
395
|
+
),
|
396
|
+
),
|
397
|
+
),
|
398
|
+
AirbyteMessage(
|
399
|
+
type=MessageType.RECORD,
|
400
|
+
record=AirbyteRecordMessage(
|
401
|
+
stream=_STREAM_NAME,
|
402
|
+
data=self._record_data,
|
403
|
+
emitted_at=1577836800000,
|
404
|
+
),
|
405
|
+
),
|
406
|
+
]
|
407
|
+
assert expected_messages == messages
|
408
|
+
|
409
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
410
|
+
def test_on_record_emits_status_message_on_first_record_with_repository_message(self):
|
411
|
+
stream_instances_to_read_from = [self._stream]
|
412
|
+
partition = Mock(spec=Partition)
|
413
|
+
log_message = Mock(spec=LogMessage)
|
414
|
+
partition.to_slice.return_value = log_message
|
415
|
+
partition.stream_name.return_value = _STREAM_NAME
|
416
|
+
partition.is_closed.return_value = True
|
417
|
+
self._message_repository.consume_queue.return_value = [
|
418
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
419
|
+
]
|
420
|
+
|
421
|
+
handler = ConcurrentReadProcessor(
|
422
|
+
stream_instances_to_read_from,
|
423
|
+
self._partition_enqueuer,
|
424
|
+
self._thread_pool_manager,
|
425
|
+
self._logger,
|
426
|
+
self._slice_logger,
|
427
|
+
self._message_repository,
|
428
|
+
self._partition_reader,
|
429
|
+
)
|
430
|
+
|
431
|
+
stream = Mock(spec=AbstractStream)
|
432
|
+
stream.name = _STREAM_NAME
|
433
|
+
stream.as_airbyte_stream.return_value = AirbyteStream(
|
434
|
+
name=_STREAM_NAME,
|
435
|
+
json_schema={},
|
436
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
437
|
+
)
|
438
|
+
|
439
|
+
messages = list(handler.on_record(self._record))
|
440
|
+
|
441
|
+
expected_messages = [
|
442
|
+
AirbyteMessage(
|
443
|
+
type=MessageType.TRACE,
|
444
|
+
trace=AirbyteTraceMessage(
|
445
|
+
type=TraceType.STREAM_STATUS,
|
446
|
+
emitted_at=1577836800000.0,
|
447
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
448
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
|
449
|
+
),
|
450
|
+
),
|
451
|
+
),
|
452
|
+
AirbyteMessage(
|
453
|
+
type=MessageType.RECORD,
|
454
|
+
record=AirbyteRecordMessage(
|
455
|
+
stream=_STREAM_NAME,
|
456
|
+
data=self._record_data,
|
457
|
+
emitted_at=1577836800000,
|
458
|
+
),
|
459
|
+
),
|
460
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
|
461
|
+
]
|
462
|
+
assert expected_messages == messages
|
463
|
+
|
464
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
465
|
+
def test_on_exception_stops_streams_and_raises_an_exception(self):
|
466
|
+
stream_instances_to_read_from = [self._stream, self._another_stream]
|
467
|
+
|
468
|
+
handler = ConcurrentReadProcessor(
|
469
|
+
stream_instances_to_read_from,
|
470
|
+
self._partition_enqueuer,
|
471
|
+
self._thread_pool_manager,
|
472
|
+
self._logger,
|
473
|
+
self._slice_logger,
|
474
|
+
self._message_repository,
|
475
|
+
self._partition_reader,
|
476
|
+
)
|
477
|
+
handler._streams_to_partitions = {_STREAM_NAME: {self._an_open_partition}, _ANOTHER_STREAM_NAME: {self._a_closed_partition}}
|
478
|
+
|
479
|
+
another_stream = Mock(spec=AbstractStream)
|
480
|
+
another_stream.name = _STREAM_NAME
|
481
|
+
another_stream.as_airbyte_stream.return_value = AirbyteStream(
|
482
|
+
name=_ANOTHER_STREAM_NAME,
|
483
|
+
json_schema={},
|
484
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
485
|
+
)
|
486
|
+
|
487
|
+
exception = RuntimeError("Something went wrong")
|
488
|
+
|
489
|
+
messages = []
|
490
|
+
|
491
|
+
with self.assertRaises(RuntimeError):
|
492
|
+
for m in handler.on_exception(exception):
|
493
|
+
messages.append(m)
|
494
|
+
|
495
|
+
expected_message = [
|
496
|
+
AirbyteMessage(
|
497
|
+
type=MessageType.TRACE,
|
498
|
+
trace=AirbyteTraceMessage(
|
499
|
+
type=TraceType.STREAM_STATUS,
|
500
|
+
emitted_at=1577836800000.0,
|
501
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
502
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.INCOMPLETE)
|
503
|
+
),
|
504
|
+
),
|
505
|
+
)
|
506
|
+
]
|
507
|
+
|
508
|
+
assert messages == expected_message
|
509
|
+
self._thread_pool_manager.shutdown.assert_called_once()
|
510
|
+
|
511
|
+
def test_is_done_is_false_if_there_are_any_instances_to_read_from(self):
|
512
|
+
stream_instances_to_read_from = [self._stream]
|
513
|
+
|
514
|
+
handler = ConcurrentReadProcessor(
|
515
|
+
stream_instances_to_read_from,
|
516
|
+
self._partition_enqueuer,
|
517
|
+
self._thread_pool_manager,
|
518
|
+
self._logger,
|
519
|
+
self._slice_logger,
|
520
|
+
self._message_repository,
|
521
|
+
self._partition_reader,
|
522
|
+
)
|
523
|
+
|
524
|
+
assert not handler.is_done()
|
525
|
+
|
526
|
+
def test_is_done_is_false_if_there_are_streams_still_generating_partitions(self):
|
527
|
+
stream_instances_to_read_from = [self._stream]
|
528
|
+
|
529
|
+
handler = ConcurrentReadProcessor(
|
530
|
+
stream_instances_to_read_from,
|
531
|
+
self._partition_enqueuer,
|
532
|
+
self._thread_pool_manager,
|
533
|
+
self._logger,
|
534
|
+
self._slice_logger,
|
535
|
+
self._message_repository,
|
536
|
+
self._partition_reader,
|
537
|
+
)
|
538
|
+
|
539
|
+
handler.start_next_partition_generator()
|
540
|
+
|
541
|
+
assert not handler.is_done()
|
542
|
+
|
543
|
+
def test_is_done_is_false_if_all_partitions_are_not_closed(self):
|
544
|
+
stream_instances_to_read_from = [self._stream]
|
545
|
+
|
546
|
+
handler = ConcurrentReadProcessor(
|
547
|
+
stream_instances_to_read_from,
|
548
|
+
self._partition_enqueuer,
|
549
|
+
self._thread_pool_manager,
|
550
|
+
self._logger,
|
551
|
+
self._slice_logger,
|
552
|
+
self._message_repository,
|
553
|
+
self._partition_reader,
|
554
|
+
)
|
555
|
+
|
556
|
+
handler.start_next_partition_generator()
|
557
|
+
handler.on_partition(self._an_open_partition)
|
558
|
+
handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._stream))
|
559
|
+
|
560
|
+
assert not handler.is_done()
|
561
|
+
|
562
|
+
def test_is_done_is_true_if_all_partitions_are_closed_and_no_streams_are_generating_partitions_and_none_are_still_to_run(self):
|
563
|
+
stream_instances_to_read_from = []
|
564
|
+
|
565
|
+
handler = ConcurrentReadProcessor(
|
566
|
+
stream_instances_to_read_from,
|
567
|
+
self._partition_enqueuer,
|
568
|
+
self._thread_pool_manager,
|
569
|
+
self._logger,
|
570
|
+
self._slice_logger,
|
571
|
+
self._message_repository,
|
572
|
+
self._partition_reader,
|
573
|
+
)
|
574
|
+
|
575
|
+
assert handler.is_done()
|
576
|
+
|
577
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
578
|
+
def test_start_next_partition_generator(self):
|
579
|
+
stream_instances_to_read_from = [self._stream]
|
580
|
+
handler = ConcurrentReadProcessor(
|
581
|
+
stream_instances_to_read_from,
|
582
|
+
self._partition_enqueuer,
|
583
|
+
self._thread_pool_manager,
|
584
|
+
self._logger,
|
585
|
+
self._slice_logger,
|
586
|
+
self._message_repository,
|
587
|
+
self._partition_reader,
|
588
|
+
)
|
589
|
+
|
590
|
+
status_message = handler.start_next_partition_generator()
|
591
|
+
|
592
|
+
assert status_message == AirbyteMessage(
|
593
|
+
type=MessageType.TRACE,
|
594
|
+
trace=AirbyteTraceMessage(
|
595
|
+
type=TraceType.STREAM_STATUS,
|
596
|
+
emitted_at=1577836800000.0,
|
597
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
598
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.STARTED)
|
599
|
+
),
|
600
|
+
),
|
601
|
+
)
|
602
|
+
|
603
|
+
assert _STREAM_NAME in handler._streams_currently_generating_partitions
|
604
|
+
self._thread_pool_manager.submit.assert_called_with(self._partition_enqueuer.generate_partitions, self._stream)
|
@@ -33,7 +33,7 @@ def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
|
|
33
33
|
|
34
34
|
|
35
35
|
def _record(cursor_value: Comparable) -> Record:
|
36
|
-
return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
|
36
|
+
return Record(data={_A_CURSOR_FIELD_KEY: cursor_value}, stream_name=_A_STREAM_NAME)
|
37
37
|
|
38
38
|
|
39
39
|
class ConcurrentCursorTest(TestCase):
|