airbyte-cdk 0.53.9__py3-none-any.whl → 0.55.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +16 -4
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +14 -14
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +2 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
- airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
- airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
- airbyte_cdk/sources/utils/slice_logger.py +5 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +40 -28
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +1 -1
- unit_tests/sources/concurrent_source/__init__.py +3 -0
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +33 -0
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +9 -2
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
- unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
- unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
- unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
- unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
- unit_tests/sources/streams/test_stream_read.py +1 -2
- unit_tests/sources/test_concurrent_source.py +105 -0
- unit_tests/sources/test_source_read.py +461 -0
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,604 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
import logging
|
5
|
+
import unittest
|
6
|
+
from unittest.mock import Mock
|
7
|
+
|
8
|
+
import freezegun
|
9
|
+
from airbyte_cdk.models import (
|
10
|
+
AirbyteLogMessage,
|
11
|
+
AirbyteMessage,
|
12
|
+
AirbyteRecordMessage,
|
13
|
+
AirbyteStream,
|
14
|
+
AirbyteStreamStatus,
|
15
|
+
AirbyteStreamStatusTraceMessage,
|
16
|
+
AirbyteTraceMessage,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.models import Level as LogLevel
|
19
|
+
from airbyte_cdk.models import StreamDescriptor, SyncMode, TraceType
|
20
|
+
from airbyte_cdk.models import Type as MessageType
|
21
|
+
from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
|
22
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
|
23
|
+
from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
|
24
|
+
from airbyte_cdk.sources.message import LogMessage, MessageRepository
|
25
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
26
|
+
from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
|
27
|
+
from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
|
28
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
29
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
30
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
|
31
|
+
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
32
|
+
|
33
|
+
_STREAM_NAME = "stream"
|
34
|
+
_ANOTHER_STREAM_NAME = "stream2"
|
35
|
+
|
36
|
+
|
37
|
+
class TestConcurrentReadProcessor(unittest.TestCase):
|
38
|
+
def setUp(self):
|
39
|
+
self._partition_enqueuer = Mock(spec=PartitionEnqueuer)
|
40
|
+
self._thread_pool_manager = Mock(spec=ThreadPoolManager)
|
41
|
+
|
42
|
+
self._an_open_partition = Mock(spec=Partition)
|
43
|
+
self._an_open_partition.is_closed.return_value = False
|
44
|
+
self._log_message = Mock(spec=LogMessage)
|
45
|
+
self._an_open_partition.to_slice.return_value = self._log_message
|
46
|
+
self._an_open_partition.stream_name.return_value = _STREAM_NAME
|
47
|
+
|
48
|
+
self._a_closed_partition = Mock(spec=Partition)
|
49
|
+
self._a_closed_partition.is_closed.return_value = True
|
50
|
+
self._a_closed_partition.stream_name.return_value = _ANOTHER_STREAM_NAME
|
51
|
+
|
52
|
+
self._logger = Mock(spec=logging.Logger)
|
53
|
+
self._slice_logger = Mock(spec=SliceLogger)
|
54
|
+
self._slice_logger.create_slice_log_message.return_value = self._log_message
|
55
|
+
self._message_repository = Mock(spec=MessageRepository)
|
56
|
+
self._message_repository.consume_queue.return_value = []
|
57
|
+
self._partition_reader = Mock(spec=PartitionReader)
|
58
|
+
|
59
|
+
self._stream = Mock(spec=AbstractStream)
|
60
|
+
self._stream.name = _STREAM_NAME
|
61
|
+
self._stream.as_airbyte_stream.return_value = AirbyteStream(
|
62
|
+
name=_STREAM_NAME,
|
63
|
+
json_schema={},
|
64
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
65
|
+
)
|
66
|
+
self._another_stream = Mock(spec=AbstractStream)
|
67
|
+
self._another_stream.name = _ANOTHER_STREAM_NAME
|
68
|
+
self._another_stream.as_airbyte_stream.return_value = AirbyteStream(
|
69
|
+
name=_ANOTHER_STREAM_NAME,
|
70
|
+
json_schema={},
|
71
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
72
|
+
)
|
73
|
+
|
74
|
+
self._record_data = {"id": 1, "value": "A"}
|
75
|
+
self._record = Mock(spec=Record)
|
76
|
+
self._record.stream_name = _STREAM_NAME
|
77
|
+
self._record.data = self._record_data
|
78
|
+
|
79
|
+
def test_handle_partition_done_no_other_streams_to_generate_partitions_for(self):
|
80
|
+
stream_instances_to_read_from = [self._stream]
|
81
|
+
|
82
|
+
handler = ConcurrentReadProcessor(
|
83
|
+
stream_instances_to_read_from,
|
84
|
+
self._partition_enqueuer,
|
85
|
+
self._thread_pool_manager,
|
86
|
+
self._logger,
|
87
|
+
self._slice_logger,
|
88
|
+
self._message_repository,
|
89
|
+
self._partition_reader,
|
90
|
+
)
|
91
|
+
handler.start_next_partition_generator()
|
92
|
+
handler.on_partition(self._an_open_partition)
|
93
|
+
|
94
|
+
sentinel = PartitionGenerationCompletedSentinel(self._stream)
|
95
|
+
messages = list(handler.on_partition_generation_completed(sentinel))
|
96
|
+
|
97
|
+
expected_messages = []
|
98
|
+
assert expected_messages == messages
|
99
|
+
|
100
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
101
|
+
def test_handle_last_stream_partition_done(self):
|
102
|
+
stream_instances_to_read_from = [self._another_stream]
|
103
|
+
|
104
|
+
handler = ConcurrentReadProcessor(
|
105
|
+
stream_instances_to_read_from,
|
106
|
+
self._partition_enqueuer,
|
107
|
+
self._thread_pool_manager,
|
108
|
+
self._logger,
|
109
|
+
self._slice_logger,
|
110
|
+
self._message_repository,
|
111
|
+
self._partition_reader,
|
112
|
+
)
|
113
|
+
handler.start_next_partition_generator()
|
114
|
+
handler.on_partition(self._a_closed_partition)
|
115
|
+
|
116
|
+
sentinel = PartitionGenerationCompletedSentinel(self._another_stream)
|
117
|
+
messages = handler.on_partition_generation_completed(sentinel)
|
118
|
+
|
119
|
+
expected_messages = [
|
120
|
+
AirbyteMessage(
|
121
|
+
type=MessageType.TRACE,
|
122
|
+
trace=AirbyteTraceMessage(
|
123
|
+
type=TraceType.STREAM_STATUS,
|
124
|
+
emitted_at=1577836800000.0,
|
125
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
126
|
+
stream_descriptor=StreamDescriptor(name=_ANOTHER_STREAM_NAME),
|
127
|
+
status=AirbyteStreamStatus(AirbyteStreamStatus.COMPLETE),
|
128
|
+
),
|
129
|
+
),
|
130
|
+
)
|
131
|
+
]
|
132
|
+
assert expected_messages == messages
|
133
|
+
|
134
|
+
def test_handle_partition(self):
|
135
|
+
stream_instances_to_read_from = [self._another_stream]
|
136
|
+
|
137
|
+
handler = ConcurrentReadProcessor(
|
138
|
+
stream_instances_to_read_from,
|
139
|
+
self._partition_enqueuer,
|
140
|
+
self._thread_pool_manager,
|
141
|
+
self._logger,
|
142
|
+
self._slice_logger,
|
143
|
+
self._message_repository,
|
144
|
+
self._partition_reader,
|
145
|
+
)
|
146
|
+
|
147
|
+
handler.on_partition(self._a_closed_partition)
|
148
|
+
|
149
|
+
self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._a_closed_partition)
|
150
|
+
assert self._a_closed_partition in handler._streams_to_partitions[_ANOTHER_STREAM_NAME]
|
151
|
+
|
152
|
+
def test_handle_partition_emits_log_message_if_it_should_be_logged(self):
|
153
|
+
stream_instances_to_read_from = [self._stream]
|
154
|
+
self._slice_logger = Mock(spec=SliceLogger)
|
155
|
+
self._slice_logger.should_log_slice_message.return_value = True
|
156
|
+
self._slice_logger.create_slice_log_message.return_value = self._log_message
|
157
|
+
|
158
|
+
handler = ConcurrentReadProcessor(
|
159
|
+
stream_instances_to_read_from,
|
160
|
+
self._partition_enqueuer,
|
161
|
+
self._thread_pool_manager,
|
162
|
+
self._logger,
|
163
|
+
self._slice_logger,
|
164
|
+
self._message_repository,
|
165
|
+
self._partition_reader,
|
166
|
+
)
|
167
|
+
|
168
|
+
handler.on_partition(self._an_open_partition)
|
169
|
+
|
170
|
+
self._thread_pool_manager.submit.assert_called_with(self._partition_reader.process_partition, self._an_open_partition)
|
171
|
+
self._message_repository.emit_message.assert_called_with(self._log_message)
|
172
|
+
assert self._an_open_partition in handler._streams_to_partitions[_STREAM_NAME]
|
173
|
+
|
174
|
+
def test_handle_on_partition_complete_sentinel_with_messages_from_repository(self):
|
175
|
+
stream_instances_to_read_from = [self._stream]
|
176
|
+
partition = Mock(spec=Partition)
|
177
|
+
log_message = Mock(spec=LogMessage)
|
178
|
+
partition.to_slice.return_value = log_message
|
179
|
+
partition.stream_name.return_value = _STREAM_NAME
|
180
|
+
partition.is_closed.return_value = True
|
181
|
+
|
182
|
+
handler = ConcurrentReadProcessor(
|
183
|
+
stream_instances_to_read_from,
|
184
|
+
self._partition_enqueuer,
|
185
|
+
self._thread_pool_manager,
|
186
|
+
self._logger,
|
187
|
+
self._slice_logger,
|
188
|
+
self._message_repository,
|
189
|
+
self._partition_reader,
|
190
|
+
)
|
191
|
+
handler.start_next_partition_generator()
|
192
|
+
|
193
|
+
sentinel = PartitionCompleteSentinel(partition)
|
194
|
+
|
195
|
+
self._message_repository.consume_queue.return_value = [
|
196
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
197
|
+
]
|
198
|
+
|
199
|
+
messages = list(handler.on_partition_complete_sentinel(sentinel))
|
200
|
+
|
201
|
+
expected_messages = [
|
202
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
203
|
+
]
|
204
|
+
assert expected_messages == messages
|
205
|
+
|
206
|
+
partition.close.assert_called_once()
|
207
|
+
|
208
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
209
|
+
def test_handle_on_partition_complete_sentinel_yields_status_message_if_the_stream_is_done(self):
|
210
|
+
self._streams_currently_generating_partitions = [self._another_stream]
|
211
|
+
stream_instances_to_read_from = [self._another_stream]
|
212
|
+
log_message = Mock(spec=LogMessage)
|
213
|
+
self._a_closed_partition.to_slice.return_value = log_message
|
214
|
+
self._message_repository.consume_queue.return_value = []
|
215
|
+
|
216
|
+
handler = ConcurrentReadProcessor(
|
217
|
+
stream_instances_to_read_from,
|
218
|
+
self._partition_enqueuer,
|
219
|
+
self._thread_pool_manager,
|
220
|
+
self._logger,
|
221
|
+
self._slice_logger,
|
222
|
+
self._message_repository,
|
223
|
+
self._partition_reader,
|
224
|
+
)
|
225
|
+
handler.start_next_partition_generator()
|
226
|
+
handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._another_stream))
|
227
|
+
|
228
|
+
sentinel = PartitionCompleteSentinel(self._a_closed_partition)
|
229
|
+
|
230
|
+
messages = list(handler.on_partition_complete_sentinel(sentinel))
|
231
|
+
|
232
|
+
expected_messages = [
|
233
|
+
AirbyteMessage(
|
234
|
+
type=MessageType.TRACE,
|
235
|
+
trace=AirbyteTraceMessage(
|
236
|
+
type=TraceType.STREAM_STATUS,
|
237
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
238
|
+
stream_descriptor=StreamDescriptor(
|
239
|
+
name=_ANOTHER_STREAM_NAME,
|
240
|
+
),
|
241
|
+
status=AirbyteStreamStatus.COMPLETE,
|
242
|
+
),
|
243
|
+
emitted_at=1577836800000.0,
|
244
|
+
),
|
245
|
+
)
|
246
|
+
]
|
247
|
+
assert expected_messages == messages
|
248
|
+
self._a_closed_partition.close.assert_called_once()
|
249
|
+
|
250
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
251
|
+
def test_handle_on_partition_complete_sentinel_yields_no_status_message_if_the_stream_is_not_done(self):
|
252
|
+
stream_instances_to_read_from = [self._stream]
|
253
|
+
partition = Mock(spec=Partition)
|
254
|
+
log_message = Mock(spec=LogMessage)
|
255
|
+
partition.to_slice.return_value = log_message
|
256
|
+
partition.stream_name.return_value = _STREAM_NAME
|
257
|
+
partition.is_closed.return_value = True
|
258
|
+
|
259
|
+
handler = ConcurrentReadProcessor(
|
260
|
+
stream_instances_to_read_from,
|
261
|
+
self._partition_enqueuer,
|
262
|
+
self._thread_pool_manager,
|
263
|
+
self._logger,
|
264
|
+
self._slice_logger,
|
265
|
+
self._message_repository,
|
266
|
+
self._partition_reader,
|
267
|
+
)
|
268
|
+
handler.start_next_partition_generator()
|
269
|
+
|
270
|
+
sentinel = PartitionCompleteSentinel(partition)
|
271
|
+
|
272
|
+
messages = list(handler.on_partition_complete_sentinel(sentinel))
|
273
|
+
|
274
|
+
expected_messages = []
|
275
|
+
assert expected_messages == messages
|
276
|
+
partition.close.assert_called_once()
|
277
|
+
|
278
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
279
|
+
def test_on_record_no_status_message_no_repository_messge(self):
|
280
|
+
stream_instances_to_read_from = [self._stream]
|
281
|
+
partition = Mock(spec=Partition)
|
282
|
+
log_message = Mock(spec=LogMessage)
|
283
|
+
partition.to_slice.return_value = log_message
|
284
|
+
partition.stream_name.return_value = _STREAM_NAME
|
285
|
+
partition.is_closed.return_value = True
|
286
|
+
self._message_repository.consume_queue.return_value = []
|
287
|
+
|
288
|
+
handler = ConcurrentReadProcessor(
|
289
|
+
stream_instances_to_read_from,
|
290
|
+
self._partition_enqueuer,
|
291
|
+
self._thread_pool_manager,
|
292
|
+
self._logger,
|
293
|
+
self._slice_logger,
|
294
|
+
self._message_repository,
|
295
|
+
self._partition_reader,
|
296
|
+
)
|
297
|
+
|
298
|
+
# Simulate a first record
|
299
|
+
list(handler.on_record(self._record))
|
300
|
+
|
301
|
+
messages = list(handler.on_record(self._record))
|
302
|
+
|
303
|
+
expected_messages = [
|
304
|
+
AirbyteMessage(
|
305
|
+
type=MessageType.RECORD,
|
306
|
+
record=AirbyteRecordMessage(
|
307
|
+
stream=_STREAM_NAME,
|
308
|
+
data=self._record_data,
|
309
|
+
emitted_at=1577836800000,
|
310
|
+
),
|
311
|
+
)
|
312
|
+
]
|
313
|
+
assert expected_messages == messages
|
314
|
+
|
315
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
316
|
+
def test_on_record_with_repository_messge(self):
|
317
|
+
stream_instances_to_read_from = [self._stream]
|
318
|
+
partition = Mock(spec=Partition)
|
319
|
+
log_message = Mock(spec=LogMessage)
|
320
|
+
partition.to_slice.return_value = log_message
|
321
|
+
partition.stream_name.return_value = _STREAM_NAME
|
322
|
+
partition.is_closed.return_value = True
|
323
|
+
slice_logger = Mock(spec=SliceLogger)
|
324
|
+
slice_logger.should_log_slice_message.return_value = True
|
325
|
+
slice_logger.create_slice_log_message.return_value = log_message
|
326
|
+
self._message_repository.consume_queue.return_value = [
|
327
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
328
|
+
]
|
329
|
+
|
330
|
+
handler = ConcurrentReadProcessor(
|
331
|
+
stream_instances_to_read_from,
|
332
|
+
self._partition_enqueuer,
|
333
|
+
self._thread_pool_manager,
|
334
|
+
self._logger,
|
335
|
+
self._slice_logger,
|
336
|
+
self._message_repository,
|
337
|
+
self._partition_reader,
|
338
|
+
)
|
339
|
+
|
340
|
+
stream = Mock(spec=AbstractStream)
|
341
|
+
stream.name = _STREAM_NAME
|
342
|
+
stream.as_airbyte_stream.return_value = AirbyteStream(
|
343
|
+
name=_STREAM_NAME,
|
344
|
+
json_schema={},
|
345
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
346
|
+
)
|
347
|
+
|
348
|
+
# Simulate a first record
|
349
|
+
list(handler.on_record(self._record))
|
350
|
+
|
351
|
+
messages = list(handler.on_record(self._record))
|
352
|
+
|
353
|
+
expected_messages = [
|
354
|
+
AirbyteMessage(
|
355
|
+
type=MessageType.RECORD,
|
356
|
+
record=AirbyteRecordMessage(
|
357
|
+
stream=_STREAM_NAME,
|
358
|
+
data=self._record_data,
|
359
|
+
emitted_at=1577836800000,
|
360
|
+
),
|
361
|
+
),
|
362
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
|
363
|
+
]
|
364
|
+
assert expected_messages == messages
|
365
|
+
assert handler._record_counter[_STREAM_NAME] == 2
|
366
|
+
|
367
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
368
|
+
def test_on_record_emits_status_message_on_first_record_no_repository_message(self):
|
369
|
+
self._streams_currently_generating_partitions = [_STREAM_NAME]
|
370
|
+
stream_instances_to_read_from = [self._stream]
|
371
|
+
partition = Mock(spec=Partition)
|
372
|
+
partition.stream_name.return_value = _STREAM_NAME
|
373
|
+
partition.is_closed.return_value = True
|
374
|
+
|
375
|
+
handler = ConcurrentReadProcessor(
|
376
|
+
stream_instances_to_read_from,
|
377
|
+
self._partition_enqueuer,
|
378
|
+
self._thread_pool_manager,
|
379
|
+
self._logger,
|
380
|
+
self._slice_logger,
|
381
|
+
self._message_repository,
|
382
|
+
self._partition_reader,
|
383
|
+
)
|
384
|
+
|
385
|
+
messages = list(handler.on_record(self._record))
|
386
|
+
|
387
|
+
expected_messages = [
|
388
|
+
AirbyteMessage(
|
389
|
+
type=MessageType.TRACE,
|
390
|
+
trace=AirbyteTraceMessage(
|
391
|
+
type=TraceType.STREAM_STATUS,
|
392
|
+
emitted_at=1577836800000.0,
|
393
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
394
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
|
395
|
+
),
|
396
|
+
),
|
397
|
+
),
|
398
|
+
AirbyteMessage(
|
399
|
+
type=MessageType.RECORD,
|
400
|
+
record=AirbyteRecordMessage(
|
401
|
+
stream=_STREAM_NAME,
|
402
|
+
data=self._record_data,
|
403
|
+
emitted_at=1577836800000,
|
404
|
+
),
|
405
|
+
),
|
406
|
+
]
|
407
|
+
assert expected_messages == messages
|
408
|
+
|
409
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
410
|
+
def test_on_record_emits_status_message_on_first_record_with_repository_message(self):
|
411
|
+
stream_instances_to_read_from = [self._stream]
|
412
|
+
partition = Mock(spec=Partition)
|
413
|
+
log_message = Mock(spec=LogMessage)
|
414
|
+
partition.to_slice.return_value = log_message
|
415
|
+
partition.stream_name.return_value = _STREAM_NAME
|
416
|
+
partition.is_closed.return_value = True
|
417
|
+
self._message_repository.consume_queue.return_value = [
|
418
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository"))
|
419
|
+
]
|
420
|
+
|
421
|
+
handler = ConcurrentReadProcessor(
|
422
|
+
stream_instances_to_read_from,
|
423
|
+
self._partition_enqueuer,
|
424
|
+
self._thread_pool_manager,
|
425
|
+
self._logger,
|
426
|
+
self._slice_logger,
|
427
|
+
self._message_repository,
|
428
|
+
self._partition_reader,
|
429
|
+
)
|
430
|
+
|
431
|
+
stream = Mock(spec=AbstractStream)
|
432
|
+
stream.name = _STREAM_NAME
|
433
|
+
stream.as_airbyte_stream.return_value = AirbyteStream(
|
434
|
+
name=_STREAM_NAME,
|
435
|
+
json_schema={},
|
436
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
437
|
+
)
|
438
|
+
|
439
|
+
messages = list(handler.on_record(self._record))
|
440
|
+
|
441
|
+
expected_messages = [
|
442
|
+
AirbyteMessage(
|
443
|
+
type=MessageType.TRACE,
|
444
|
+
trace=AirbyteTraceMessage(
|
445
|
+
type=TraceType.STREAM_STATUS,
|
446
|
+
emitted_at=1577836800000.0,
|
447
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
448
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.RUNNING)
|
449
|
+
),
|
450
|
+
),
|
451
|
+
),
|
452
|
+
AirbyteMessage(
|
453
|
+
type=MessageType.RECORD,
|
454
|
+
record=AirbyteRecordMessage(
|
455
|
+
stream=_STREAM_NAME,
|
456
|
+
data=self._record_data,
|
457
|
+
emitted_at=1577836800000,
|
458
|
+
),
|
459
|
+
),
|
460
|
+
AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=LogLevel.INFO, message="message emitted from the repository")),
|
461
|
+
]
|
462
|
+
assert expected_messages == messages
|
463
|
+
|
464
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
465
|
+
def test_on_exception_stops_streams_and_raises_an_exception(self):
|
466
|
+
stream_instances_to_read_from = [self._stream, self._another_stream]
|
467
|
+
|
468
|
+
handler = ConcurrentReadProcessor(
|
469
|
+
stream_instances_to_read_from,
|
470
|
+
self._partition_enqueuer,
|
471
|
+
self._thread_pool_manager,
|
472
|
+
self._logger,
|
473
|
+
self._slice_logger,
|
474
|
+
self._message_repository,
|
475
|
+
self._partition_reader,
|
476
|
+
)
|
477
|
+
handler._streams_to_partitions = {_STREAM_NAME: {self._an_open_partition}, _ANOTHER_STREAM_NAME: {self._a_closed_partition}}
|
478
|
+
|
479
|
+
another_stream = Mock(spec=AbstractStream)
|
480
|
+
another_stream.name = _STREAM_NAME
|
481
|
+
another_stream.as_airbyte_stream.return_value = AirbyteStream(
|
482
|
+
name=_ANOTHER_STREAM_NAME,
|
483
|
+
json_schema={},
|
484
|
+
supported_sync_modes=[SyncMode.full_refresh],
|
485
|
+
)
|
486
|
+
|
487
|
+
exception = RuntimeError("Something went wrong")
|
488
|
+
|
489
|
+
messages = []
|
490
|
+
|
491
|
+
with self.assertRaises(RuntimeError):
|
492
|
+
for m in handler.on_exception(exception):
|
493
|
+
messages.append(m)
|
494
|
+
|
495
|
+
expected_message = [
|
496
|
+
AirbyteMessage(
|
497
|
+
type=MessageType.TRACE,
|
498
|
+
trace=AirbyteTraceMessage(
|
499
|
+
type=TraceType.STREAM_STATUS,
|
500
|
+
emitted_at=1577836800000.0,
|
501
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
502
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.INCOMPLETE)
|
503
|
+
),
|
504
|
+
),
|
505
|
+
)
|
506
|
+
]
|
507
|
+
|
508
|
+
assert messages == expected_message
|
509
|
+
self._thread_pool_manager.shutdown.assert_called_once()
|
510
|
+
|
511
|
+
def test_is_done_is_false_if_there_are_any_instances_to_read_from(self):
|
512
|
+
stream_instances_to_read_from = [self._stream]
|
513
|
+
|
514
|
+
handler = ConcurrentReadProcessor(
|
515
|
+
stream_instances_to_read_from,
|
516
|
+
self._partition_enqueuer,
|
517
|
+
self._thread_pool_manager,
|
518
|
+
self._logger,
|
519
|
+
self._slice_logger,
|
520
|
+
self._message_repository,
|
521
|
+
self._partition_reader,
|
522
|
+
)
|
523
|
+
|
524
|
+
assert not handler.is_done()
|
525
|
+
|
526
|
+
def test_is_done_is_false_if_there_are_streams_still_generating_partitions(self):
|
527
|
+
stream_instances_to_read_from = [self._stream]
|
528
|
+
|
529
|
+
handler = ConcurrentReadProcessor(
|
530
|
+
stream_instances_to_read_from,
|
531
|
+
self._partition_enqueuer,
|
532
|
+
self._thread_pool_manager,
|
533
|
+
self._logger,
|
534
|
+
self._slice_logger,
|
535
|
+
self._message_repository,
|
536
|
+
self._partition_reader,
|
537
|
+
)
|
538
|
+
|
539
|
+
handler.start_next_partition_generator()
|
540
|
+
|
541
|
+
assert not handler.is_done()
|
542
|
+
|
543
|
+
def test_is_done_is_false_if_all_partitions_are_not_closed(self):
|
544
|
+
stream_instances_to_read_from = [self._stream]
|
545
|
+
|
546
|
+
handler = ConcurrentReadProcessor(
|
547
|
+
stream_instances_to_read_from,
|
548
|
+
self._partition_enqueuer,
|
549
|
+
self._thread_pool_manager,
|
550
|
+
self._logger,
|
551
|
+
self._slice_logger,
|
552
|
+
self._message_repository,
|
553
|
+
self._partition_reader,
|
554
|
+
)
|
555
|
+
|
556
|
+
handler.start_next_partition_generator()
|
557
|
+
handler.on_partition(self._an_open_partition)
|
558
|
+
handler.on_partition_generation_completed(PartitionGenerationCompletedSentinel(self._stream))
|
559
|
+
|
560
|
+
assert not handler.is_done()
|
561
|
+
|
562
|
+
def test_is_done_is_true_if_all_partitions_are_closed_and_no_streams_are_generating_partitions_and_none_are_still_to_run(self):
|
563
|
+
stream_instances_to_read_from = []
|
564
|
+
|
565
|
+
handler = ConcurrentReadProcessor(
|
566
|
+
stream_instances_to_read_from,
|
567
|
+
self._partition_enqueuer,
|
568
|
+
self._thread_pool_manager,
|
569
|
+
self._logger,
|
570
|
+
self._slice_logger,
|
571
|
+
self._message_repository,
|
572
|
+
self._partition_reader,
|
573
|
+
)
|
574
|
+
|
575
|
+
assert handler.is_done()
|
576
|
+
|
577
|
+
@freezegun.freeze_time("2020-01-01T00:00:00")
|
578
|
+
def test_start_next_partition_generator(self):
|
579
|
+
stream_instances_to_read_from = [self._stream]
|
580
|
+
handler = ConcurrentReadProcessor(
|
581
|
+
stream_instances_to_read_from,
|
582
|
+
self._partition_enqueuer,
|
583
|
+
self._thread_pool_manager,
|
584
|
+
self._logger,
|
585
|
+
self._slice_logger,
|
586
|
+
self._message_repository,
|
587
|
+
self._partition_reader,
|
588
|
+
)
|
589
|
+
|
590
|
+
status_message = handler.start_next_partition_generator()
|
591
|
+
|
592
|
+
assert status_message == AirbyteMessage(
|
593
|
+
type=MessageType.TRACE,
|
594
|
+
trace=AirbyteTraceMessage(
|
595
|
+
type=TraceType.STREAM_STATUS,
|
596
|
+
emitted_at=1577836800000.0,
|
597
|
+
stream_status=AirbyteStreamStatusTraceMessage(
|
598
|
+
stream_descriptor=StreamDescriptor(name=_STREAM_NAME), status=AirbyteStreamStatus(AirbyteStreamStatus.STARTED)
|
599
|
+
),
|
600
|
+
),
|
601
|
+
)
|
602
|
+
|
603
|
+
assert _STREAM_NAME in handler._streams_currently_generating_partitions
|
604
|
+
self._thread_pool_manager.submit.assert_called_with(self._partition_enqueuer.generate_partitions, self._stream)
|
@@ -33,7 +33,7 @@ def _partition(_slice: Optional[Mapping[str, Any]]) -> Partition:
|
|
33
33
|
|
34
34
|
|
35
35
|
def _record(cursor_value: Comparable) -> Record:
|
36
|
-
return Record(data={_A_CURSOR_FIELD_KEY: cursor_value})
|
36
|
+
return Record(data={_A_CURSOR_FIELD_KEY: cursor_value}, stream_name=_A_STREAM_NAME)
|
37
37
|
|
38
38
|
|
39
39
|
class ConcurrentCursorTest(TestCase):
|