airbyte-cdk 0.53.9__py3-none-any.whl → 0.55.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +16 -4
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +14 -14
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +2 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
- airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
- airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
- airbyte_cdk/sources/utils/slice_logger.py +5 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +40 -28
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +1 -1
- unit_tests/sources/concurrent_source/__init__.py +3 -0
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +33 -0
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +9 -2
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
- unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
- unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
- unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
- unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
- unit_tests/sources/streams/test_stream_read.py +1 -2
- unit_tests/sources/test_concurrent_source.py +105 -0
- unit_tests/sources/test_source_read.py +461 -0
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
|
-
from typing import Any, List, Mapping, Optional, Union
|
6
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
import requests
|
9
9
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
@@ -94,7 +94,7 @@ class DefaultPaginator(Paginator):
|
|
94
94
|
page_size_option: Optional[RequestOption] = None
|
95
95
|
page_token_option: Optional[Union[RequestPath, RequestOption]] = None
|
96
96
|
|
97
|
-
def __post_init__(self, parameters: Mapping[str, Any]):
|
97
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
98
98
|
if self.page_size_option and not self.pagination_strategy.get_page_size():
|
99
99
|
raise ValueError("page_size_option cannot be set if the pagination strategy does not have a page_size")
|
100
100
|
if isinstance(self.url_base, str):
|
@@ -108,10 +108,10 @@ class DefaultPaginator(Paginator):
|
|
108
108
|
else:
|
109
109
|
return None
|
110
110
|
|
111
|
-
def path(self):
|
111
|
+
def path(self) -> Optional[str]:
|
112
112
|
if self._token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
|
113
113
|
# Replace url base to only return the path
|
114
|
-
return str(self._token).replace(self.url_base.eval(self.config), "")
|
114
|
+
return str(self._token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
115
115
|
else:
|
116
116
|
return None
|
117
117
|
|
@@ -121,7 +121,7 @@ class DefaultPaginator(Paginator):
|
|
121
121
|
stream_state: Optional[StreamState] = None,
|
122
122
|
stream_slice: Optional[StreamSlice] = None,
|
123
123
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
|
-
) ->
|
124
|
+
) -> MutableMapping[str, Any]:
|
125
125
|
return self._get_request_options(RequestOptionType.request_parameter)
|
126
126
|
|
127
127
|
def get_request_headers(
|
@@ -151,11 +151,11 @@ class DefaultPaginator(Paginator):
|
|
151
151
|
) -> Mapping[str, Any]:
|
152
152
|
return self._get_request_options(RequestOptionType.body_json)
|
153
153
|
|
154
|
-
def reset(self):
|
154
|
+
def reset(self) -> None:
|
155
155
|
self.pagination_strategy.reset()
|
156
|
-
self._token =
|
156
|
+
self._token = self.pagination_strategy.initial_token
|
157
157
|
|
158
|
-
def _get_request_options(self, option_type: RequestOptionType) ->
|
158
|
+
def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping[str, Any]:
|
159
159
|
options = {}
|
160
160
|
|
161
161
|
if (
|
@@ -178,7 +178,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
178
178
|
|
179
179
|
_PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
|
180
180
|
|
181
|
-
def __init__(self, decorated, maximum_number_of_pages: int = 5):
|
181
|
+
def __init__(self, decorated: Paginator, maximum_number_of_pages: int = 5) -> None:
|
182
182
|
if maximum_number_of_pages and maximum_number_of_pages < 1:
|
183
183
|
raise ValueError(f"The maximum number of pages on a test read needs to be strictly positive. Got {maximum_number_of_pages}")
|
184
184
|
self._maximum_number_of_pages = maximum_number_of_pages
|
@@ -192,7 +192,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
192
192
|
self._page_count += 1
|
193
193
|
return self._decorated.next_page_token(response, last_records)
|
194
194
|
|
195
|
-
def path(self):
|
195
|
+
def path(self) -> Optional[str]:
|
196
196
|
return self._decorated.path()
|
197
197
|
|
198
198
|
def get_request_params(
|
@@ -201,7 +201,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
201
201
|
stream_state: Optional[StreamState] = None,
|
202
202
|
stream_slice: Optional[StreamSlice] = None,
|
203
203
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
204
|
-
) ->
|
204
|
+
) -> MutableMapping[str, Any]:
|
205
205
|
return self._decorated.get_request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
206
206
|
|
207
207
|
def get_request_headers(
|
@@ -219,7 +219,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
219
219
|
stream_state: Optional[StreamState] = None,
|
220
220
|
stream_slice: Optional[StreamSlice] = None,
|
221
221
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
222
|
-
) -> Mapping[str, Any]:
|
222
|
+
) -> Optional[Union[Mapping[str, Any], str]]:
|
223
223
|
return self._decorated.get_request_body_data(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
224
224
|
|
225
225
|
def get_request_body_json(
|
@@ -228,9 +228,9 @@ class PaginatorTestReadDecorator(Paginator):
|
|
228
228
|
stream_state: Optional[StreamState] = None,
|
229
229
|
stream_slice: Optional[StreamSlice] = None,
|
230
230
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
231
|
-
) -> Mapping[str, Any]:
|
231
|
+
) -> Optional[Mapping[str, Any]]:
|
232
232
|
return self._decorated.get_request_body_json(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
233
233
|
|
234
|
-
def reset(self):
|
234
|
+
def reset(self) -> None:
|
235
235
|
self._decorated.reset()
|
236
236
|
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
@@ -53,7 +53,7 @@ class RequestOptionsProvider:
|
|
53
53
|
stream_state: Optional[StreamState] = None,
|
54
54
|
stream_slice: Optional[StreamSlice] = None,
|
55
55
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
56
|
-
) -> Optional[Union[Mapping, str]]:
|
56
|
+
) -> Optional[Union[Mapping[str, Any], str]]:
|
57
57
|
"""
|
58
58
|
Specifies how to populate the body of the request with a non-JSON payload.
|
59
59
|
|
@@ -71,7 +71,7 @@ class RequestOptionsProvider:
|
|
71
71
|
stream_state: Optional[StreamState] = None,
|
72
72
|
stream_slice: Optional[StreamSlice] = None,
|
73
73
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
74
|
-
) -> Optional[Mapping]:
|
74
|
+
) -> Optional[Mapping[str, Any]]:
|
75
75
|
"""
|
76
76
|
Specifies how to populate the body of the request with a JSON payload.
|
77
77
|
|
@@ -7,7 +7,7 @@ from typing import Any, Iterable, Mapping, Optional
|
|
7
7
|
|
8
8
|
from airbyte_cdk.models import AirbyteStream
|
9
9
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
|
10
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
11
11
|
from deprecated.classic import deprecated
|
12
12
|
|
13
13
|
|
@@ -37,10 +37,10 @@ class AbstractStream(ABC):
|
|
37
37
|
"""
|
38
38
|
|
39
39
|
@abstractmethod
|
40
|
-
def
|
40
|
+
def generate_partitions(self) -> Iterable[Partition]:
|
41
41
|
"""
|
42
|
-
|
43
|
-
:return:
|
42
|
+
Generates the partitions that will be read by this stream.
|
43
|
+
:return: An iterable of partitions.
|
44
44
|
"""
|
45
45
|
|
46
46
|
@property
|
@@ -22,11 +22,11 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
|
22
22
|
StreamUnavailable,
|
23
23
|
)
|
24
24
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
|
25
|
+
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
25
26
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
26
27
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
27
28
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
28
29
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
29
|
-
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
30
30
|
from airbyte_cdk.sources.streams.core import StreamData
|
31
31
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
32
32
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
@@ -52,7 +52,6 @@ class StreamFacade(Stream):
|
|
52
52
|
stream: Stream,
|
53
53
|
source: AbstractSource,
|
54
54
|
logger: logging.Logger,
|
55
|
-
max_workers: int,
|
56
55
|
state: Optional[MutableMapping[str, Any]],
|
57
56
|
cursor: Cursor,
|
58
57
|
) -> Stream:
|
@@ -73,28 +72,27 @@ class StreamFacade(Stream):
|
|
73
72
|
|
74
73
|
message_repository = source.message_repository
|
75
74
|
return StreamFacade(
|
76
|
-
|
75
|
+
DefaultStream(
|
77
76
|
partition_generator=StreamPartitionGenerator(
|
78
77
|
stream,
|
79
78
|
message_repository,
|
80
79
|
SyncMode.full_refresh if isinstance(cursor, NoopCursor) else SyncMode.incremental,
|
81
80
|
[cursor_field] if cursor_field is not None else None,
|
82
81
|
state,
|
82
|
+
cursor,
|
83
83
|
),
|
84
|
-
max_workers=max_workers,
|
85
84
|
name=stream.name,
|
86
85
|
namespace=stream.namespace,
|
87
86
|
json_schema=stream.get_json_schema(),
|
88
87
|
availability_strategy=StreamAvailabilityStrategy(stream, source),
|
89
88
|
primary_key=pk,
|
90
89
|
cursor_field=cursor_field,
|
91
|
-
slice_logger=source._slice_logger,
|
92
|
-
message_repository=message_repository,
|
93
90
|
logger=logger,
|
94
|
-
cursor=cursor,
|
95
91
|
),
|
96
92
|
stream,
|
97
93
|
cursor,
|
94
|
+
slice_logger=source._slice_logger,
|
95
|
+
logger=logger,
|
98
96
|
)
|
99
97
|
|
100
98
|
@property
|
@@ -132,13 +130,15 @@ class StreamFacade(Stream):
|
|
132
130
|
else:
|
133
131
|
return stream.cursor_field
|
134
132
|
|
135
|
-
def __init__(self, stream: AbstractStream, legacy_stream: Stream, cursor: Cursor):
|
133
|
+
def __init__(self, stream: AbstractStream, legacy_stream: Stream, cursor: Cursor, slice_logger: SliceLogger, logger: logging.Logger):
|
136
134
|
"""
|
137
135
|
:param stream: The underlying AbstractStream
|
138
136
|
"""
|
139
137
|
self._abstract_stream = stream
|
140
138
|
self._legacy_stream = legacy_stream
|
141
139
|
self._cursor = cursor
|
140
|
+
self._slice_logger = slice_logger
|
141
|
+
self._logger = logger
|
142
142
|
|
143
143
|
def read_full_refresh(
|
144
144
|
self,
|
@@ -177,8 +177,11 @@ class StreamFacade(Stream):
|
|
177
177
|
yield from self._read_records()
|
178
178
|
|
179
179
|
def _read_records(self) -> Iterable[StreamData]:
|
180
|
-
for
|
181
|
-
|
180
|
+
for partition in self._abstract_stream.generate_partitions():
|
181
|
+
if self._slice_logger.should_log_slice_message(self._logger):
|
182
|
+
yield self._slice_logger.create_slice_log_message(partition.to_slice())
|
183
|
+
for record in partition.read():
|
184
|
+
yield record.data
|
182
185
|
|
183
186
|
@property
|
184
187
|
def name(self) -> str:
|
@@ -259,6 +262,7 @@ class StreamPartition(Partition):
|
|
259
262
|
sync_mode: SyncMode,
|
260
263
|
cursor_field: Optional[List[str]],
|
261
264
|
state: Optional[MutableMapping[str, Any]],
|
265
|
+
cursor: Cursor,
|
262
266
|
):
|
263
267
|
"""
|
264
268
|
:param stream: The stream to delegate to
|
@@ -271,6 +275,8 @@ class StreamPartition(Partition):
|
|
271
275
|
self._sync_mode = sync_mode
|
272
276
|
self._cursor_field = cursor_field
|
273
277
|
self._state = state
|
278
|
+
self._cursor = cursor
|
279
|
+
self._is_closed = False
|
274
280
|
|
275
281
|
def read(self) -> Iterable[Record]:
|
276
282
|
"""
|
@@ -294,7 +300,9 @@ class StreamPartition(Partition):
|
|
294
300
|
if isinstance(record_data, Mapping):
|
295
301
|
data_to_return = dict(record_data)
|
296
302
|
self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
|
297
|
-
|
303
|
+
record = Record(data_to_return, self._stream.name)
|
304
|
+
self._cursor.observe(record)
|
305
|
+
yield Record(data_to_return, self._stream.name)
|
298
306
|
else:
|
299
307
|
self._message_repository.emit_message(record_data)
|
300
308
|
except Exception as e:
|
@@ -315,6 +323,16 @@ class StreamPartition(Partition):
|
|
315
323
|
else:
|
316
324
|
return hash(self._stream.name)
|
317
325
|
|
326
|
+
def stream_name(self) -> str:
|
327
|
+
return self._stream.name
|
328
|
+
|
329
|
+
def close(self) -> None:
|
330
|
+
self._cursor.close_partition(self)
|
331
|
+
self._is_closed = True
|
332
|
+
|
333
|
+
def is_closed(self) -> bool:
|
334
|
+
return self._is_closed
|
335
|
+
|
318
336
|
def __repr__(self) -> str:
|
319
337
|
return f"StreamPartition({self._stream.name}, {self._slice})"
|
320
338
|
|
@@ -334,6 +352,7 @@ class StreamPartitionGenerator(PartitionGenerator):
|
|
334
352
|
sync_mode: SyncMode,
|
335
353
|
cursor_field: Optional[List[str]],
|
336
354
|
state: Optional[MutableMapping[str, Any]],
|
355
|
+
cursor: Cursor,
|
337
356
|
):
|
338
357
|
"""
|
339
358
|
:param stream: The stream to delegate to
|
@@ -344,10 +363,13 @@ class StreamPartitionGenerator(PartitionGenerator):
|
|
344
363
|
self._sync_mode = sync_mode
|
345
364
|
self._cursor_field = cursor_field
|
346
365
|
self._state = state
|
366
|
+
self._cursor = cursor
|
347
367
|
|
348
368
|
def generate(self) -> Iterable[Partition]:
|
349
369
|
for s in self._stream.stream_slices(sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state):
|
350
|
-
yield StreamPartition(
|
370
|
+
yield StreamPartition(
|
371
|
+
self._stream, copy.deepcopy(s), self.message_repository, self._sync_mode, self._cursor_field, self._state, self._cursor
|
372
|
+
)
|
351
373
|
|
352
374
|
|
353
375
|
@deprecated("This class is experimental. Use at your own risk.")
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from functools import lru_cache
|
6
|
+
from logging import Logger
|
7
|
+
from typing import Any, Iterable, List, Mapping, Optional
|
8
|
+
|
9
|
+
from airbyte_cdk.models import AirbyteStream, SyncMode
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
11
|
+
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability
|
12
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
13
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
14
|
+
|
15
|
+
|
16
|
+
class DefaultStream(AbstractStream):
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
partition_generator: PartitionGenerator,
|
20
|
+
name: str,
|
21
|
+
json_schema: Mapping[str, Any],
|
22
|
+
availability_strategy: AbstractAvailabilityStrategy,
|
23
|
+
primary_key: List[str],
|
24
|
+
cursor_field: Optional[str],
|
25
|
+
logger: Logger,
|
26
|
+
namespace: Optional[str] = None,
|
27
|
+
) -> None:
|
28
|
+
self._stream_partition_generator = partition_generator
|
29
|
+
self._name = name
|
30
|
+
self._json_schema = json_schema
|
31
|
+
self._availability_strategy = availability_strategy
|
32
|
+
self._primary_key = primary_key
|
33
|
+
self._cursor_field = cursor_field
|
34
|
+
self._logger = logger
|
35
|
+
self._namespace = namespace
|
36
|
+
|
37
|
+
def generate_partitions(self) -> Iterable[Partition]:
|
38
|
+
yield from self._stream_partition_generator.generate()
|
39
|
+
|
40
|
+
@property
|
41
|
+
def name(self) -> str:
|
42
|
+
return self._name
|
43
|
+
|
44
|
+
def check_availability(self) -> StreamAvailability:
|
45
|
+
return self._availability_strategy.check_availability(self._logger)
|
46
|
+
|
47
|
+
@property
|
48
|
+
def cursor_field(self) -> Optional[str]:
|
49
|
+
return self._cursor_field
|
50
|
+
|
51
|
+
@lru_cache(maxsize=None)
|
52
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
53
|
+
return self._json_schema
|
54
|
+
|
55
|
+
def as_airbyte_stream(self) -> AirbyteStream:
|
56
|
+
stream = AirbyteStream(name=self.name, json_schema=dict(self._json_schema), supported_sync_modes=[SyncMode.full_refresh])
|
57
|
+
|
58
|
+
if self._namespace:
|
59
|
+
stream.namespace = self._namespace
|
60
|
+
|
61
|
+
if self._cursor_field:
|
62
|
+
stream.source_defined_cursor = True
|
63
|
+
stream.supported_sync_modes.append(SyncMode.incremental)
|
64
|
+
stream.default_cursor_field = [self._cursor_field]
|
65
|
+
|
66
|
+
keys = self._primary_key
|
67
|
+
if keys and len(keys) > 0:
|
68
|
+
stream.source_defined_primary_key = [keys]
|
69
|
+
|
70
|
+
return stream
|
71
|
+
|
72
|
+
def log_stream_sync_configuration(self) -> None:
|
73
|
+
self._logger.debug(
|
74
|
+
f"Syncing stream instance: {self.name}",
|
75
|
+
extra={
|
76
|
+
"primary_key": self._primary_key,
|
77
|
+
"cursor_field": self.cursor_field,
|
78
|
+
},
|
79
|
+
)
|
@@ -4,8 +4,9 @@
|
|
4
4
|
|
5
5
|
from queue import Queue
|
6
6
|
|
7
|
-
from airbyte_cdk.sources.
|
8
|
-
from airbyte_cdk.sources.streams.concurrent.
|
7
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
|
9
10
|
|
10
11
|
|
11
12
|
class PartitionEnqueuer:
|
@@ -13,15 +14,14 @@ class PartitionEnqueuer:
|
|
13
14
|
Generates partitions from a partition generator and puts them in a queue.
|
14
15
|
"""
|
15
16
|
|
16
|
-
def __init__(self, queue: Queue[QueueItem]
|
17
|
+
def __init__(self, queue: Queue[QueueItem]) -> None:
|
17
18
|
"""
|
18
19
|
:param queue: The queue to put the partitions in.
|
19
20
|
:param sentinel: The sentinel to put in the queue when all the partitions have been generated.
|
20
21
|
"""
|
21
22
|
self._queue = queue
|
22
|
-
self._sentinel = sentinel
|
23
23
|
|
24
|
-
def generate_partitions(self,
|
24
|
+
def generate_partitions(self, stream: AbstractStream) -> None:
|
25
25
|
"""
|
26
26
|
Generate partitions from a partition generator and put them in a queue.
|
27
27
|
When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated.
|
@@ -33,8 +33,8 @@ class PartitionEnqueuer:
|
|
33
33
|
:return:
|
34
34
|
"""
|
35
35
|
try:
|
36
|
-
for partition in
|
36
|
+
for partition in stream.generate_partitions():
|
37
37
|
self._queue.put(partition)
|
38
|
-
self._queue.put(
|
38
|
+
self._queue.put(PartitionGenerationCompletedSentinel(stream))
|
39
39
|
except Exception as e:
|
40
40
|
self._queue.put(e)
|
@@ -32,6 +32,29 @@ class Partition(ABC):
|
|
32
32
|
"""
|
33
33
|
pass
|
34
34
|
|
35
|
+
@abstractmethod
|
36
|
+
def stream_name(self) -> str:
|
37
|
+
"""
|
38
|
+
Returns the name of the stream that this partition is reading from.
|
39
|
+
:return: The name of the stream.
|
40
|
+
"""
|
41
|
+
pass
|
42
|
+
|
43
|
+
@abstractmethod
|
44
|
+
def close(self) -> None:
|
45
|
+
"""
|
46
|
+
Closes the partition.
|
47
|
+
"""
|
48
|
+
pass
|
49
|
+
|
50
|
+
@abstractmethod
|
51
|
+
def is_closed(self) -> bool:
|
52
|
+
"""
|
53
|
+
Returns whether the partition is closed.
|
54
|
+
:return:
|
55
|
+
"""
|
56
|
+
pass
|
57
|
+
|
35
58
|
@abstractmethod
|
36
59
|
def __hash__(self) -> int:
|
37
60
|
"""
|
@@ -10,13 +10,14 @@ class Record:
|
|
10
10
|
Represents a record read from a stream.
|
11
11
|
"""
|
12
12
|
|
13
|
-
def __init__(self, data: Mapping[str, Any]):
|
13
|
+
def __init__(self, data: Mapping[str, Any], stream_name: str):
|
14
14
|
self.data = data
|
15
|
+
self.stream_name = stream_name
|
15
16
|
|
16
17
|
def __eq__(self, other: Any) -> bool:
|
17
18
|
if not isinstance(other, Record):
|
18
19
|
return False
|
19
|
-
return self.data == other.data
|
20
|
+
return self.data == other.data and self.stream_name == other.stream_name
|
20
21
|
|
21
22
|
def __repr__(self) -> str:
|
22
|
-
return f"Record(data={self.data})"
|
23
|
+
return f"Record(data={self.data}, stream_name={self.stream_name})"
|
@@ -4,11 +4,10 @@
|
|
4
4
|
|
5
5
|
from typing import Union
|
6
6
|
|
7
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
|
7
8
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
8
9
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
9
10
|
|
10
|
-
PARTITIONS_GENERATED_SENTINEL = object
|
11
|
-
|
12
11
|
|
13
12
|
class PartitionCompleteSentinel:
|
14
13
|
"""
|
@@ -26,4 +25,4 @@ class PartitionCompleteSentinel:
|
|
26
25
|
"""
|
27
26
|
Typedef representing the items that can be added to the ThreadBasedConcurrentStream
|
28
27
|
"""
|
29
|
-
QueueItem = Union[Record, Partition, PartitionCompleteSentinel,
|
28
|
+
QueueItem = Union[Record, Partition, PartitionCompleteSentinel, PartitionGenerationCompletedSentinel, Exception]
|
@@ -12,6 +12,11 @@ from airbyte_cdk.models import Type as MessageType
|
|
12
12
|
|
13
13
|
|
14
14
|
class SliceLogger(ABC):
|
15
|
+
"""
|
16
|
+
SliceLogger is an interface that allows us to log slices of data in a uniform way.
|
17
|
+
It is responsible for determining whether or not a slice should be logged and for creating the log message.
|
18
|
+
"""
|
19
|
+
|
15
20
|
SLICE_LOG_PREFIX = "slice:"
|
16
21
|
|
17
22
|
def create_slice_log_message(self, _slice: Optional[Mapping[str, Any]]) -> AirbyteMessage:
|