airbyte-cdk 0.53.9__py3-none-any.whl → 0.55.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/concurrent_source/__init__.py +3 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +190 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +161 -0
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +63 -0
- airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +17 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +97 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +16 -4
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +14 -14
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +2 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +4 -4
- airbyte_cdk/sources/streams/concurrent/adapters.py +34 -12
- airbyte_cdk/sources/streams/concurrent/default_stream.py +79 -0
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +7 -7
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +23 -0
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +4 -3
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +2 -3
- airbyte_cdk/sources/utils/slice_logger.py +5 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/RECORD +40 -28
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/WHEEL +1 -1
- unit_tests/sources/concurrent_source/__init__.py +3 -0
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +105 -0
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +33 -0
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +9 -2
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +14 -7
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +2 -3
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +44 -55
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +24 -15
- unit_tests/sources/streams/concurrent/test_adapters.py +52 -32
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +6 -5
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +604 -0
- unit_tests/sources/streams/concurrent/test_cursor.py +1 -1
- unit_tests/sources/streams/concurrent/{test_thread_based_concurrent_stream.py → test_default_stream.py} +7 -144
- unit_tests/sources/streams/concurrent/test_partition_reader.py +2 -2
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +98 -0
- unit_tests/sources/streams/test_stream_read.py +1 -2
- unit_tests/sources/test_concurrent_source.py +105 -0
- unit_tests/sources/test_source_read.py +461 -0
- airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +0 -221
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.53.9.dist-info → airbyte_cdk-0.55.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
|
-
from typing import Any, List, Mapping, Optional, Union
|
6
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
import requests
|
9
9
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
@@ -94,7 +94,7 @@ class DefaultPaginator(Paginator):
|
|
94
94
|
page_size_option: Optional[RequestOption] = None
|
95
95
|
page_token_option: Optional[Union[RequestPath, RequestOption]] = None
|
96
96
|
|
97
|
-
def __post_init__(self, parameters: Mapping[str, Any]):
|
97
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
98
98
|
if self.page_size_option and not self.pagination_strategy.get_page_size():
|
99
99
|
raise ValueError("page_size_option cannot be set if the pagination strategy does not have a page_size")
|
100
100
|
if isinstance(self.url_base, str):
|
@@ -108,10 +108,10 @@ class DefaultPaginator(Paginator):
|
|
108
108
|
else:
|
109
109
|
return None
|
110
110
|
|
111
|
-
def path(self):
|
111
|
+
def path(self) -> Optional[str]:
|
112
112
|
if self._token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
|
113
113
|
# Replace url base to only return the path
|
114
|
-
return str(self._token).replace(self.url_base.eval(self.config), "")
|
114
|
+
return str(self._token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
115
115
|
else:
|
116
116
|
return None
|
117
117
|
|
@@ -121,7 +121,7 @@ class DefaultPaginator(Paginator):
|
|
121
121
|
stream_state: Optional[StreamState] = None,
|
122
122
|
stream_slice: Optional[StreamSlice] = None,
|
123
123
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
|
-
) ->
|
124
|
+
) -> MutableMapping[str, Any]:
|
125
125
|
return self._get_request_options(RequestOptionType.request_parameter)
|
126
126
|
|
127
127
|
def get_request_headers(
|
@@ -151,11 +151,11 @@ class DefaultPaginator(Paginator):
|
|
151
151
|
) -> Mapping[str, Any]:
|
152
152
|
return self._get_request_options(RequestOptionType.body_json)
|
153
153
|
|
154
|
-
def reset(self):
|
154
|
+
def reset(self) -> None:
|
155
155
|
self.pagination_strategy.reset()
|
156
|
-
self._token =
|
156
|
+
self._token = self.pagination_strategy.initial_token
|
157
157
|
|
158
|
-
def _get_request_options(self, option_type: RequestOptionType) ->
|
158
|
+
def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping[str, Any]:
|
159
159
|
options = {}
|
160
160
|
|
161
161
|
if (
|
@@ -178,7 +178,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
178
178
|
|
179
179
|
_PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
|
180
180
|
|
181
|
-
def __init__(self, decorated, maximum_number_of_pages: int = 5):
|
181
|
+
def __init__(self, decorated: Paginator, maximum_number_of_pages: int = 5) -> None:
|
182
182
|
if maximum_number_of_pages and maximum_number_of_pages < 1:
|
183
183
|
raise ValueError(f"The maximum number of pages on a test read needs to be strictly positive. Got {maximum_number_of_pages}")
|
184
184
|
self._maximum_number_of_pages = maximum_number_of_pages
|
@@ -192,7 +192,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
192
192
|
self._page_count += 1
|
193
193
|
return self._decorated.next_page_token(response, last_records)
|
194
194
|
|
195
|
-
def path(self):
|
195
|
+
def path(self) -> Optional[str]:
|
196
196
|
return self._decorated.path()
|
197
197
|
|
198
198
|
def get_request_params(
|
@@ -201,7 +201,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
201
201
|
stream_state: Optional[StreamState] = None,
|
202
202
|
stream_slice: Optional[StreamSlice] = None,
|
203
203
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
204
|
-
) ->
|
204
|
+
) -> MutableMapping[str, Any]:
|
205
205
|
return self._decorated.get_request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
206
206
|
|
207
207
|
def get_request_headers(
|
@@ -219,7 +219,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
219
219
|
stream_state: Optional[StreamState] = None,
|
220
220
|
stream_slice: Optional[StreamSlice] = None,
|
221
221
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
222
|
-
) -> Mapping[str, Any]:
|
222
|
+
) -> Optional[Union[Mapping[str, Any], str]]:
|
223
223
|
return self._decorated.get_request_body_data(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
224
224
|
|
225
225
|
def get_request_body_json(
|
@@ -228,9 +228,9 @@ class PaginatorTestReadDecorator(Paginator):
|
|
228
228
|
stream_state: Optional[StreamState] = None,
|
229
229
|
stream_slice: Optional[StreamSlice] = None,
|
230
230
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
231
|
-
) -> Mapping[str, Any]:
|
231
|
+
) -> Optional[Mapping[str, Any]]:
|
232
232
|
return self._decorated.get_request_body_json(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
233
233
|
|
234
|
-
def reset(self):
|
234
|
+
def reset(self) -> None:
|
235
235
|
self._decorated.reset()
|
236
236
|
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
@@ -53,7 +53,7 @@ class RequestOptionsProvider:
|
|
53
53
|
stream_state: Optional[StreamState] = None,
|
54
54
|
stream_slice: Optional[StreamSlice] = None,
|
55
55
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
56
|
-
) -> Optional[Union[Mapping, str]]:
|
56
|
+
) -> Optional[Union[Mapping[str, Any], str]]:
|
57
57
|
"""
|
58
58
|
Specifies how to populate the body of the request with a non-JSON payload.
|
59
59
|
|
@@ -71,7 +71,7 @@ class RequestOptionsProvider:
|
|
71
71
|
stream_state: Optional[StreamState] = None,
|
72
72
|
stream_slice: Optional[StreamSlice] = None,
|
73
73
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
74
|
-
) -> Optional[Mapping]:
|
74
|
+
) -> Optional[Mapping[str, Any]]:
|
75
75
|
"""
|
76
76
|
Specifies how to populate the body of the request with a JSON payload.
|
77
77
|
|
@@ -7,7 +7,7 @@ from typing import Any, Iterable, Mapping, Optional
|
|
7
7
|
|
8
8
|
from airbyte_cdk.models import AirbyteStream
|
9
9
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
|
10
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
11
11
|
from deprecated.classic import deprecated
|
12
12
|
|
13
13
|
|
@@ -37,10 +37,10 @@ class AbstractStream(ABC):
|
|
37
37
|
"""
|
38
38
|
|
39
39
|
@abstractmethod
|
40
|
-
def
|
40
|
+
def generate_partitions(self) -> Iterable[Partition]:
|
41
41
|
"""
|
42
|
-
|
43
|
-
:return:
|
42
|
+
Generates the partitions that will be read by this stream.
|
43
|
+
:return: An iterable of partitions.
|
44
44
|
"""
|
45
45
|
|
46
46
|
@property
|
@@ -22,11 +22,11 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
|
22
22
|
StreamUnavailable,
|
23
23
|
)
|
24
24
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
|
25
|
+
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
25
26
|
from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage
|
26
27
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
27
28
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
28
29
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
29
|
-
from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream
|
30
30
|
from airbyte_cdk.sources.streams.core import StreamData
|
31
31
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
32
32
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
@@ -52,7 +52,6 @@ class StreamFacade(Stream):
|
|
52
52
|
stream: Stream,
|
53
53
|
source: AbstractSource,
|
54
54
|
logger: logging.Logger,
|
55
|
-
max_workers: int,
|
56
55
|
state: Optional[MutableMapping[str, Any]],
|
57
56
|
cursor: Cursor,
|
58
57
|
) -> Stream:
|
@@ -73,28 +72,27 @@ class StreamFacade(Stream):
|
|
73
72
|
|
74
73
|
message_repository = source.message_repository
|
75
74
|
return StreamFacade(
|
76
|
-
|
75
|
+
DefaultStream(
|
77
76
|
partition_generator=StreamPartitionGenerator(
|
78
77
|
stream,
|
79
78
|
message_repository,
|
80
79
|
SyncMode.full_refresh if isinstance(cursor, NoopCursor) else SyncMode.incremental,
|
81
80
|
[cursor_field] if cursor_field is not None else None,
|
82
81
|
state,
|
82
|
+
cursor,
|
83
83
|
),
|
84
|
-
max_workers=max_workers,
|
85
84
|
name=stream.name,
|
86
85
|
namespace=stream.namespace,
|
87
86
|
json_schema=stream.get_json_schema(),
|
88
87
|
availability_strategy=StreamAvailabilityStrategy(stream, source),
|
89
88
|
primary_key=pk,
|
90
89
|
cursor_field=cursor_field,
|
91
|
-
slice_logger=source._slice_logger,
|
92
|
-
message_repository=message_repository,
|
93
90
|
logger=logger,
|
94
|
-
cursor=cursor,
|
95
91
|
),
|
96
92
|
stream,
|
97
93
|
cursor,
|
94
|
+
slice_logger=source._slice_logger,
|
95
|
+
logger=logger,
|
98
96
|
)
|
99
97
|
|
100
98
|
@property
|
@@ -132,13 +130,15 @@ class StreamFacade(Stream):
|
|
132
130
|
else:
|
133
131
|
return stream.cursor_field
|
134
132
|
|
135
|
-
def __init__(self, stream: AbstractStream, legacy_stream: Stream, cursor: Cursor):
|
133
|
+
def __init__(self, stream: AbstractStream, legacy_stream: Stream, cursor: Cursor, slice_logger: SliceLogger, logger: logging.Logger):
|
136
134
|
"""
|
137
135
|
:param stream: The underlying AbstractStream
|
138
136
|
"""
|
139
137
|
self._abstract_stream = stream
|
140
138
|
self._legacy_stream = legacy_stream
|
141
139
|
self._cursor = cursor
|
140
|
+
self._slice_logger = slice_logger
|
141
|
+
self._logger = logger
|
142
142
|
|
143
143
|
def read_full_refresh(
|
144
144
|
self,
|
@@ -177,8 +177,11 @@ class StreamFacade(Stream):
|
|
177
177
|
yield from self._read_records()
|
178
178
|
|
179
179
|
def _read_records(self) -> Iterable[StreamData]:
|
180
|
-
for
|
181
|
-
|
180
|
+
for partition in self._abstract_stream.generate_partitions():
|
181
|
+
if self._slice_logger.should_log_slice_message(self._logger):
|
182
|
+
yield self._slice_logger.create_slice_log_message(partition.to_slice())
|
183
|
+
for record in partition.read():
|
184
|
+
yield record.data
|
182
185
|
|
183
186
|
@property
|
184
187
|
def name(self) -> str:
|
@@ -259,6 +262,7 @@ class StreamPartition(Partition):
|
|
259
262
|
sync_mode: SyncMode,
|
260
263
|
cursor_field: Optional[List[str]],
|
261
264
|
state: Optional[MutableMapping[str, Any]],
|
265
|
+
cursor: Cursor,
|
262
266
|
):
|
263
267
|
"""
|
264
268
|
:param stream: The stream to delegate to
|
@@ -271,6 +275,8 @@ class StreamPartition(Partition):
|
|
271
275
|
self._sync_mode = sync_mode
|
272
276
|
self._cursor_field = cursor_field
|
273
277
|
self._state = state
|
278
|
+
self._cursor = cursor
|
279
|
+
self._is_closed = False
|
274
280
|
|
275
281
|
def read(self) -> Iterable[Record]:
|
276
282
|
"""
|
@@ -294,7 +300,9 @@ class StreamPartition(Partition):
|
|
294
300
|
if isinstance(record_data, Mapping):
|
295
301
|
data_to_return = dict(record_data)
|
296
302
|
self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
|
297
|
-
|
303
|
+
record = Record(data_to_return, self._stream.name)
|
304
|
+
self._cursor.observe(record)
|
305
|
+
yield Record(data_to_return, self._stream.name)
|
298
306
|
else:
|
299
307
|
self._message_repository.emit_message(record_data)
|
300
308
|
except Exception as e:
|
@@ -315,6 +323,16 @@ class StreamPartition(Partition):
|
|
315
323
|
else:
|
316
324
|
return hash(self._stream.name)
|
317
325
|
|
326
|
+
def stream_name(self) -> str:
|
327
|
+
return self._stream.name
|
328
|
+
|
329
|
+
def close(self) -> None:
|
330
|
+
self._cursor.close_partition(self)
|
331
|
+
self._is_closed = True
|
332
|
+
|
333
|
+
def is_closed(self) -> bool:
|
334
|
+
return self._is_closed
|
335
|
+
|
318
336
|
def __repr__(self) -> str:
|
319
337
|
return f"StreamPartition({self._stream.name}, {self._slice})"
|
320
338
|
|
@@ -334,6 +352,7 @@ class StreamPartitionGenerator(PartitionGenerator):
|
|
334
352
|
sync_mode: SyncMode,
|
335
353
|
cursor_field: Optional[List[str]],
|
336
354
|
state: Optional[MutableMapping[str, Any]],
|
355
|
+
cursor: Cursor,
|
337
356
|
):
|
338
357
|
"""
|
339
358
|
:param stream: The stream to delegate to
|
@@ -344,10 +363,13 @@ class StreamPartitionGenerator(PartitionGenerator):
|
|
344
363
|
self._sync_mode = sync_mode
|
345
364
|
self._cursor_field = cursor_field
|
346
365
|
self._state = state
|
366
|
+
self._cursor = cursor
|
347
367
|
|
348
368
|
def generate(self) -> Iterable[Partition]:
|
349
369
|
for s in self._stream.stream_slices(sync_mode=self._sync_mode, cursor_field=self._cursor_field, stream_state=self._state):
|
350
|
-
yield StreamPartition(
|
370
|
+
yield StreamPartition(
|
371
|
+
self._stream, copy.deepcopy(s), self.message_repository, self._sync_mode, self._cursor_field, self._state, self._cursor
|
372
|
+
)
|
351
373
|
|
352
374
|
|
353
375
|
@deprecated("This class is experimental. Use at your own risk.")
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from functools import lru_cache
|
6
|
+
from logging import Logger
|
7
|
+
from typing import Any, Iterable, List, Mapping, Optional
|
8
|
+
|
9
|
+
from airbyte_cdk.models import AirbyteStream, SyncMode
|
10
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
11
|
+
from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability
|
12
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
13
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
14
|
+
|
15
|
+
|
16
|
+
class DefaultStream(AbstractStream):
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
partition_generator: PartitionGenerator,
|
20
|
+
name: str,
|
21
|
+
json_schema: Mapping[str, Any],
|
22
|
+
availability_strategy: AbstractAvailabilityStrategy,
|
23
|
+
primary_key: List[str],
|
24
|
+
cursor_field: Optional[str],
|
25
|
+
logger: Logger,
|
26
|
+
namespace: Optional[str] = None,
|
27
|
+
) -> None:
|
28
|
+
self._stream_partition_generator = partition_generator
|
29
|
+
self._name = name
|
30
|
+
self._json_schema = json_schema
|
31
|
+
self._availability_strategy = availability_strategy
|
32
|
+
self._primary_key = primary_key
|
33
|
+
self._cursor_field = cursor_field
|
34
|
+
self._logger = logger
|
35
|
+
self._namespace = namespace
|
36
|
+
|
37
|
+
def generate_partitions(self) -> Iterable[Partition]:
|
38
|
+
yield from self._stream_partition_generator.generate()
|
39
|
+
|
40
|
+
@property
|
41
|
+
def name(self) -> str:
|
42
|
+
return self._name
|
43
|
+
|
44
|
+
def check_availability(self) -> StreamAvailability:
|
45
|
+
return self._availability_strategy.check_availability(self._logger)
|
46
|
+
|
47
|
+
@property
|
48
|
+
def cursor_field(self) -> Optional[str]:
|
49
|
+
return self._cursor_field
|
50
|
+
|
51
|
+
@lru_cache(maxsize=None)
|
52
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
53
|
+
return self._json_schema
|
54
|
+
|
55
|
+
def as_airbyte_stream(self) -> AirbyteStream:
|
56
|
+
stream = AirbyteStream(name=self.name, json_schema=dict(self._json_schema), supported_sync_modes=[SyncMode.full_refresh])
|
57
|
+
|
58
|
+
if self._namespace:
|
59
|
+
stream.namespace = self._namespace
|
60
|
+
|
61
|
+
if self._cursor_field:
|
62
|
+
stream.source_defined_cursor = True
|
63
|
+
stream.supported_sync_modes.append(SyncMode.incremental)
|
64
|
+
stream.default_cursor_field = [self._cursor_field]
|
65
|
+
|
66
|
+
keys = self._primary_key
|
67
|
+
if keys and len(keys) > 0:
|
68
|
+
stream.source_defined_primary_key = [keys]
|
69
|
+
|
70
|
+
return stream
|
71
|
+
|
72
|
+
def log_stream_sync_configuration(self) -> None:
|
73
|
+
self._logger.debug(
|
74
|
+
f"Syncing stream instance: {self.name}",
|
75
|
+
extra={
|
76
|
+
"primary_key": self._primary_key,
|
77
|
+
"cursor_field": self.cursor_field,
|
78
|
+
},
|
79
|
+
)
|
@@ -4,8 +4,9 @@
|
|
4
4
|
|
5
5
|
from queue import Queue
|
6
6
|
|
7
|
-
from airbyte_cdk.sources.
|
8
|
-
from airbyte_cdk.sources.streams.concurrent.
|
7
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
|
9
10
|
|
10
11
|
|
11
12
|
class PartitionEnqueuer:
|
@@ -13,15 +14,14 @@ class PartitionEnqueuer:
|
|
13
14
|
Generates partitions from a partition generator and puts them in a queue.
|
14
15
|
"""
|
15
16
|
|
16
|
-
def __init__(self, queue: Queue[QueueItem]
|
17
|
+
def __init__(self, queue: Queue[QueueItem]) -> None:
|
17
18
|
"""
|
18
19
|
:param queue: The queue to put the partitions in.
|
19
20
|
:param sentinel: The sentinel to put in the queue when all the partitions have been generated.
|
20
21
|
"""
|
21
22
|
self._queue = queue
|
22
|
-
self._sentinel = sentinel
|
23
23
|
|
24
|
-
def generate_partitions(self,
|
24
|
+
def generate_partitions(self, stream: AbstractStream) -> None:
|
25
25
|
"""
|
26
26
|
Generate partitions from a partition generator and put them in a queue.
|
27
27
|
When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated.
|
@@ -33,8 +33,8 @@ class PartitionEnqueuer:
|
|
33
33
|
:return:
|
34
34
|
"""
|
35
35
|
try:
|
36
|
-
for partition in
|
36
|
+
for partition in stream.generate_partitions():
|
37
37
|
self._queue.put(partition)
|
38
|
-
self._queue.put(
|
38
|
+
self._queue.put(PartitionGenerationCompletedSentinel(stream))
|
39
39
|
except Exception as e:
|
40
40
|
self._queue.put(e)
|
@@ -32,6 +32,29 @@ class Partition(ABC):
|
|
32
32
|
"""
|
33
33
|
pass
|
34
34
|
|
35
|
+
@abstractmethod
|
36
|
+
def stream_name(self) -> str:
|
37
|
+
"""
|
38
|
+
Returns the name of the stream that this partition is reading from.
|
39
|
+
:return: The name of the stream.
|
40
|
+
"""
|
41
|
+
pass
|
42
|
+
|
43
|
+
@abstractmethod
|
44
|
+
def close(self) -> None:
|
45
|
+
"""
|
46
|
+
Closes the partition.
|
47
|
+
"""
|
48
|
+
pass
|
49
|
+
|
50
|
+
@abstractmethod
|
51
|
+
def is_closed(self) -> bool:
|
52
|
+
"""
|
53
|
+
Returns whether the partition is closed.
|
54
|
+
:return:
|
55
|
+
"""
|
56
|
+
pass
|
57
|
+
|
35
58
|
@abstractmethod
|
36
59
|
def __hash__(self) -> int:
|
37
60
|
"""
|
@@ -10,13 +10,14 @@ class Record:
|
|
10
10
|
Represents a record read from a stream.
|
11
11
|
"""
|
12
12
|
|
13
|
-
def __init__(self, data: Mapping[str, Any]):
|
13
|
+
def __init__(self, data: Mapping[str, Any], stream_name: str):
|
14
14
|
self.data = data
|
15
|
+
self.stream_name = stream_name
|
15
16
|
|
16
17
|
def __eq__(self, other: Any) -> bool:
|
17
18
|
if not isinstance(other, Record):
|
18
19
|
return False
|
19
|
-
return self.data == other.data
|
20
|
+
return self.data == other.data and self.stream_name == other.stream_name
|
20
21
|
|
21
22
|
def __repr__(self) -> str:
|
22
|
-
return f"Record(data={self.data})"
|
23
|
+
return f"Record(data={self.data}, stream_name={self.stream_name})"
|
@@ -4,11 +4,10 @@
|
|
4
4
|
|
5
5
|
from typing import Union
|
6
6
|
|
7
|
+
from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
|
7
8
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
8
9
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
9
10
|
|
10
|
-
PARTITIONS_GENERATED_SENTINEL = object
|
11
|
-
|
12
11
|
|
13
12
|
class PartitionCompleteSentinel:
|
14
13
|
"""
|
@@ -26,4 +25,4 @@ class PartitionCompleteSentinel:
|
|
26
25
|
"""
|
27
26
|
Typedef representing the items that can be added to the ThreadBasedConcurrentStream
|
28
27
|
"""
|
29
|
-
QueueItem = Union[Record, Partition, PartitionCompleteSentinel,
|
28
|
+
QueueItem = Union[Record, Partition, PartitionCompleteSentinel, PartitionGenerationCompletedSentinel, Exception]
|
@@ -12,6 +12,11 @@ from airbyte_cdk.models import Type as MessageType
|
|
12
12
|
|
13
13
|
|
14
14
|
class SliceLogger(ABC):
|
15
|
+
"""
|
16
|
+
SliceLogger is an interface that allows us to log slices of data in a uniform way.
|
17
|
+
It is responsible for determining whether or not a slice should be logged and for creating the log message.
|
18
|
+
"""
|
19
|
+
|
15
20
|
SLICE_LOG_PREFIX = "slice:"
|
16
21
|
|
17
22
|
def create_slice_log_message(self, _slice: Optional[Mapping[str, Any]]) -> AirbyteMessage:
|