airbyte-cdk 6.61.4__py3-none-any.whl → 6.62.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +4 -2
- airbyte_cdk/manifest_server/README.md +17 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +2 -2
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +57 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +4 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +196 -269
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +4 -7
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +0 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +1 -23
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +0 -6
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +88 -107
- airbyte_cdk/sources/declarative/requesters/request_options/per_partition_request_option_provider.py +95 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +4 -1
- airbyte_cdk/sources/declarative/retrievers/retriever.py +5 -0
- airbyte_cdk/sources/message/repository.py +20 -0
- {airbyte_cdk-6.61.4.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/METADATA +6 -5
- {airbyte_cdk-6.61.4.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/RECORD +22 -21
- {airbyte_cdk-6.61.4.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.61.4.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.61.4.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.61.4.dist-info → airbyte_cdk-6.62.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -149,6 +149,7 @@ class CartesianProductStreamSlicer(PartitionRouter):
|
|
149
149
|
for stream_slice_tuple in product:
|
150
150
|
partition = dict(ChainMap(*[s.partition for s in stream_slice_tuple])) # type: ignore # ChainMap expects a MutableMapping[Never, Never] for reasons
|
151
151
|
cursor_slices = [s.cursor_slice for s in stream_slice_tuple if s.cursor_slice]
|
152
|
+
extra_fields = dict(ChainMap(*[s.extra_fields for s in stream_slice_tuple])) # type: ignore # ChainMap expects a MutableMapping[Never, Never] for reasons
|
152
153
|
if len(cursor_slices) > 1:
|
153
154
|
raise ValueError(
|
154
155
|
f"There should only be a single cursor slice. Found {cursor_slices}"
|
@@ -157,13 +158,9 @@ class CartesianProductStreamSlicer(PartitionRouter):
|
|
157
158
|
cursor_slice = cursor_slices[0]
|
158
159
|
else:
|
159
160
|
cursor_slice = {}
|
160
|
-
yield StreamSlice(
|
161
|
-
|
162
|
-
|
163
|
-
"""
|
164
|
-
Parent stream states are not supported for cartesian product stream slicer
|
165
|
-
"""
|
166
|
-
pass
|
161
|
+
yield StreamSlice(
|
162
|
+
partition=partition, cursor_slice=cursor_slice, extra_fields=extra_fields
|
163
|
+
)
|
167
164
|
|
168
165
|
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
169
166
|
"""
|
@@ -140,11 +140,6 @@ class GroupingPartitionRouter(PartitionRouter):
|
|
140
140
|
) -> Mapping[str, Any]:
|
141
141
|
return {}
|
142
142
|
|
143
|
-
def set_initial_state(self, stream_state: StreamState) -> None:
|
144
|
-
"""Delegate state initialization to the underlying partition router."""
|
145
|
-
self.underlying_partition_router.set_initial_state(stream_state)
|
146
|
-
self._state = self.underlying_partition_router.get_stream_state()
|
147
|
-
|
148
143
|
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
149
144
|
"""Delegate state retrieval to the underlying partition router."""
|
150
145
|
return self._state
|
@@ -108,12 +108,6 @@ class ListPartitionRouter(PartitionRouter):
|
|
108
108
|
else:
|
109
109
|
return {}
|
110
110
|
|
111
|
-
def set_initial_state(self, stream_state: StreamState) -> None:
|
112
|
-
"""
|
113
|
-
ListPartitionRouter doesn't have parent streams
|
114
|
-
"""
|
115
|
-
pass
|
116
|
-
|
117
111
|
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
118
112
|
"""
|
119
113
|
ListPartitionRouter doesn't have parent streams
|
@@ -15,31 +15,9 @@ class PartitionRouter(StreamSlicer):
|
|
15
15
|
"""
|
16
16
|
Base class for partition routers.
|
17
17
|
Methods:
|
18
|
-
|
19
|
-
get_parent_state(): Get the state of the parent streams.
|
18
|
+
get_stream_state(): Get the state of the parent streams.
|
20
19
|
"""
|
21
20
|
|
22
|
-
@abstractmethod
|
23
|
-
def set_initial_state(self, stream_state: StreamState) -> None:
|
24
|
-
"""
|
25
|
-
Set the state of the parent streams.
|
26
|
-
|
27
|
-
This method should only be implemented if the slicer is based on some parent stream and needs to read this stream
|
28
|
-
incrementally using the state.
|
29
|
-
|
30
|
-
Args:
|
31
|
-
stream_state (StreamState): The state of the streams to be set. The expected format is a dictionary that includes
|
32
|
-
'parent_state' which is a dictionary of parent state names to their corresponding state.
|
33
|
-
Example:
|
34
|
-
{
|
35
|
-
"parent_state": {
|
36
|
-
"parent_stream_name_1": { ... },
|
37
|
-
"parent_stream_name_2": { ... },
|
38
|
-
...
|
39
|
-
}
|
40
|
-
}
|
41
|
-
"""
|
42
|
-
|
43
21
|
@abstractmethod
|
44
22
|
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
45
23
|
"""
|
@@ -50,12 +50,6 @@ class SinglePartitionRouter(PartitionRouter):
|
|
50
50
|
def stream_slices(self) -> Iterable[StreamSlice]:
|
51
51
|
yield StreamSlice(partition={}, cursor_slice={})
|
52
52
|
|
53
|
-
def set_initial_state(self, stream_state: StreamState) -> None:
|
54
|
-
"""
|
55
|
-
SinglePartitionRouter doesn't have parent streams
|
56
|
-
"""
|
57
|
-
pass
|
58
|
-
|
59
53
|
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
60
54
|
"""
|
61
55
|
SinglePartitionRouter doesn't have parent streams
|
@@ -7,24 +7,51 @@ import copy
|
|
7
7
|
import json
|
8
8
|
import logging
|
9
9
|
from dataclasses import InitVar, dataclass
|
10
|
-
from typing import
|
10
|
+
from typing import (
|
11
|
+
TYPE_CHECKING,
|
12
|
+
Any,
|
13
|
+
Iterable,
|
14
|
+
List,
|
15
|
+
Mapping,
|
16
|
+
MutableMapping,
|
17
|
+
Optional,
|
18
|
+
TypeVar,
|
19
|
+
Union,
|
20
|
+
)
|
11
21
|
|
12
22
|
import dpath
|
13
23
|
import requests
|
14
24
|
|
15
25
|
from airbyte_cdk.models import AirbyteMessage
|
16
|
-
from airbyte_cdk.models import Type as MessageType
|
17
26
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
18
27
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
19
28
|
from airbyte_cdk.sources.declarative.requesters.request_option import (
|
20
29
|
RequestOption,
|
21
30
|
RequestOptionType,
|
22
31
|
)
|
23
|
-
from airbyte_cdk.sources.
|
24
|
-
from airbyte_cdk.
|
32
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
33
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
25
34
|
|
26
35
|
if TYPE_CHECKING:
|
27
|
-
from airbyte_cdk.sources.
|
36
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
37
|
+
|
38
|
+
|
39
|
+
T = TypeVar("T")
|
40
|
+
|
41
|
+
|
42
|
+
def iterate_with_last_flag(generator: Iterable[T]) -> Iterable[tuple[T, bool]]:
|
43
|
+
iterator = iter(generator)
|
44
|
+
|
45
|
+
try:
|
46
|
+
current = next(iterator)
|
47
|
+
except StopIteration:
|
48
|
+
return # Return an empty iterator
|
49
|
+
|
50
|
+
for next_item in iterator:
|
51
|
+
yield current, False
|
52
|
+
current = next_item
|
53
|
+
|
54
|
+
yield current, True
|
28
55
|
|
29
56
|
|
30
57
|
@dataclass
|
@@ -40,7 +67,7 @@ class ParentStreamConfig:
|
|
40
67
|
incremental_dependency (bool): Indicates if the parent stream should be read incrementally.
|
41
68
|
"""
|
42
69
|
|
43
|
-
stream: "
|
70
|
+
stream: "AbstractStream"
|
44
71
|
parent_key: Union[InterpolatedString, str]
|
45
72
|
partition_field: Union[InterpolatedString, str]
|
46
73
|
config: Config
|
@@ -176,59 +203,65 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
176
203
|
for field_path in parent_stream_config.extra_fields
|
177
204
|
]
|
178
205
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
206
|
+
for partition, is_last_slice in iterate_with_last_flag(
|
207
|
+
parent_stream.generate_partitions()
|
208
|
+
):
|
209
|
+
for parent_record, is_last_record_in_slice in iterate_with_last_flag(
|
210
|
+
partition.read()
|
211
|
+
):
|
212
|
+
# In the previous CDK implementation, state management was done internally by the stream.
|
213
|
+
# However, this could cause issues when doing availability check for example as the availability
|
214
|
+
# check would progress the state so state management was moved outside of the read method.
|
215
|
+
# Hence, we need to call the cursor here.
|
216
|
+
# Note that we call observe and close_partition before emitting the associated record as the
|
217
|
+
# ConcurrentPerPartitionCursor will associate a record with the state of the stream after the
|
218
|
+
# record was consumed.
|
219
|
+
parent_stream.cursor.observe(parent_record)
|
193
220
|
parent_partition = (
|
194
221
|
parent_record.associated_slice.partition
|
195
222
|
if parent_record.associated_slice
|
196
223
|
else {}
|
197
224
|
)
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
225
|
+
record_data = parent_record.data
|
226
|
+
|
227
|
+
try:
|
228
|
+
partition_value = dpath.get(
|
229
|
+
record_data, # type: ignore [arg-type]
|
230
|
+
parent_field,
|
231
|
+
)
|
232
|
+
except KeyError:
|
233
|
+
# FIXME a log here would go a long way for debugging
|
234
|
+
continue
|
235
|
+
|
236
|
+
# Add extra fields
|
237
|
+
extracted_extra_fields = self._extract_extra_fields(
|
238
|
+
record_data, extra_fields
|
203
239
|
)
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
240
|
+
|
241
|
+
if parent_stream_config.lazy_read_pointer:
|
242
|
+
extracted_extra_fields = {
|
243
|
+
"child_response": self._extract_child_response(
|
244
|
+
record_data,
|
245
|
+
parent_stream_config.lazy_read_pointer, # type: ignore[arg-type] # lazy_read_pointer type handeled in __post_init__ of parent_stream_config
|
246
|
+
),
|
247
|
+
**extracted_extra_fields,
|
248
|
+
}
|
249
|
+
|
250
|
+
if is_last_record_in_slice:
|
251
|
+
parent_stream.cursor.close_partition(partition)
|
252
|
+
if is_last_slice:
|
253
|
+
parent_stream.cursor.ensure_at_least_one_state_emitted()
|
254
|
+
|
255
|
+
yield StreamSlice(
|
256
|
+
partition={
|
257
|
+
partition_field: partition_value,
|
258
|
+
"parent_slice": parent_partition or {},
|
259
|
+
},
|
260
|
+
cursor_slice={},
|
261
|
+
extra_fields=extracted_extra_fields,
|
208
262
|
)
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
# Add extra fields
|
213
|
-
extracted_extra_fields = self._extract_extra_fields(parent_record, extra_fields)
|
214
|
-
|
215
|
-
if parent_stream_config.lazy_read_pointer:
|
216
|
-
extracted_extra_fields = {
|
217
|
-
"child_response": self._extract_child_response(
|
218
|
-
parent_record,
|
219
|
-
parent_stream_config.lazy_read_pointer, # type: ignore[arg-type] # lazy_read_pointer type handeled in __post_init__ of parent_stream_config
|
220
|
-
),
|
221
|
-
**extracted_extra_fields,
|
222
|
-
}
|
223
|
-
|
224
|
-
yield StreamSlice(
|
225
|
-
partition={
|
226
|
-
partition_field: partition_value,
|
227
|
-
"parent_slice": parent_partition or {},
|
228
|
-
},
|
229
|
-
cursor_slice={},
|
230
|
-
extra_fields=extracted_extra_fields,
|
231
|
-
)
|
263
|
+
|
264
|
+
yield from []
|
232
265
|
|
233
266
|
def _extract_child_response(
|
234
267
|
self, parent_record: Mapping[str, Any] | AirbyteMessage, pointer: List[InterpolatedString]
|
@@ -278,60 +311,6 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
278
311
|
extracted_extra_fields[".".join(extra_field_path)] = extra_field_value
|
279
312
|
return extracted_extra_fields
|
280
313
|
|
281
|
-
def set_initial_state(self, stream_state: StreamState) -> None:
|
282
|
-
"""
|
283
|
-
Set the state of the parent streams.
|
284
|
-
|
285
|
-
If the `parent_state` key is missing from `stream_state`, migrate the child stream state to the parent stream's state format.
|
286
|
-
This migration applies only to parent streams with incremental dependencies.
|
287
|
-
|
288
|
-
Args:
|
289
|
-
stream_state (StreamState): The state of the streams to be set.
|
290
|
-
|
291
|
-
Example of state format:
|
292
|
-
{
|
293
|
-
"parent_state": {
|
294
|
-
"parent_stream_name1": {
|
295
|
-
"last_updated": "2023-05-27T00:00:00Z"
|
296
|
-
},
|
297
|
-
"parent_stream_name2": {
|
298
|
-
"last_updated": "2023-05-27T00:00:00Z"
|
299
|
-
}
|
300
|
-
}
|
301
|
-
}
|
302
|
-
|
303
|
-
Example of migrating to parent state format:
|
304
|
-
- Initial state:
|
305
|
-
{
|
306
|
-
"updated_at": "2023-05-27T00:00:00Z"
|
307
|
-
}
|
308
|
-
- After migration:
|
309
|
-
{
|
310
|
-
"updated_at": "2023-05-27T00:00:00Z",
|
311
|
-
"parent_state": {
|
312
|
-
"parent_stream_name": {
|
313
|
-
"parent_stream_cursor": "2023-05-27T00:00:00Z"
|
314
|
-
}
|
315
|
-
}
|
316
|
-
}
|
317
|
-
"""
|
318
|
-
if not stream_state:
|
319
|
-
return
|
320
|
-
|
321
|
-
parent_state = stream_state.get("parent_state", {})
|
322
|
-
|
323
|
-
# Set state for each parent stream with an incremental dependency
|
324
|
-
for parent_config in self.parent_stream_configs:
|
325
|
-
if (
|
326
|
-
not parent_state.get(parent_config.stream.name, {})
|
327
|
-
and parent_config.incremental_dependency
|
328
|
-
):
|
329
|
-
# Migrate child state to parent state format
|
330
|
-
parent_state = self._migrate_child_state_to_parent_state(stream_state)
|
331
|
-
|
332
|
-
if parent_config.incremental_dependency:
|
333
|
-
parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
|
334
|
-
|
335
314
|
def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
|
336
315
|
"""
|
337
316
|
Migrate the child or global stream state into the parent stream's state format.
|
@@ -414,7 +393,9 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
414
393
|
parent_state = {}
|
415
394
|
for parent_config in self.parent_stream_configs:
|
416
395
|
if parent_config.incremental_dependency:
|
417
|
-
parent_state[parent_config.stream.name] = copy.deepcopy(
|
396
|
+
parent_state[parent_config.stream.name] = copy.deepcopy(
|
397
|
+
parent_config.stream.cursor.state
|
398
|
+
)
|
418
399
|
return parent_state
|
419
400
|
|
420
401
|
@property
|
airbyte_cdk/sources/declarative/requesters/request_options/per_partition_request_option_provider.py
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
from typing import Any, Mapping, Optional, Union
|
2
|
+
|
3
|
+
from airbyte_cdk.sources.declarative.partition_routers import PartitionRouter
|
4
|
+
from airbyte_cdk.sources.declarative.requesters.request_options import RequestOptionsProvider
|
5
|
+
from airbyte_cdk.sources.types import StreamSlice, StreamState
|
6
|
+
|
7
|
+
|
8
|
+
class PerPartitionRequestOptionsProvider(RequestOptionsProvider):
|
9
|
+
def __init__(self, partition_router: PartitionRouter, cursor_provider: RequestOptionsProvider):
|
10
|
+
self._partition_router = partition_router
|
11
|
+
self._cursor_provider = cursor_provider
|
12
|
+
|
13
|
+
def get_request_params(
|
14
|
+
self,
|
15
|
+
*,
|
16
|
+
stream_state: Optional[StreamState] = None,
|
17
|
+
stream_slice: Optional[StreamSlice] = None,
|
18
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
19
|
+
) -> Mapping[str, Any]:
|
20
|
+
return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
|
21
|
+
stream_state=stream_state,
|
22
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
|
23
|
+
if stream_slice
|
24
|
+
else StreamSlice(partition={}, cursor_slice={}),
|
25
|
+
next_page_token=next_page_token,
|
26
|
+
) | self._cursor_provider.get_request_params(
|
27
|
+
stream_state=stream_state,
|
28
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
|
29
|
+
if stream_slice
|
30
|
+
else StreamSlice(partition={}, cursor_slice={}),
|
31
|
+
next_page_token=next_page_token,
|
32
|
+
)
|
33
|
+
|
34
|
+
def get_request_headers(
|
35
|
+
self,
|
36
|
+
*,
|
37
|
+
stream_state: Optional[StreamState] = None,
|
38
|
+
stream_slice: Optional[StreamSlice] = None,
|
39
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
40
|
+
) -> Mapping[str, Any]:
|
41
|
+
return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
|
42
|
+
stream_state=stream_state,
|
43
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
|
44
|
+
if stream_slice
|
45
|
+
else stream_slice,
|
46
|
+
next_page_token=next_page_token,
|
47
|
+
) | self._cursor_provider.get_request_headers(
|
48
|
+
stream_state=stream_state,
|
49
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
|
50
|
+
if stream_slice
|
51
|
+
else stream_slice,
|
52
|
+
next_page_token=next_page_token,
|
53
|
+
)
|
54
|
+
|
55
|
+
def get_request_body_data(
|
56
|
+
self,
|
57
|
+
*,
|
58
|
+
stream_state: Optional[StreamState] = None,
|
59
|
+
stream_slice: Optional[StreamSlice] = None,
|
60
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
61
|
+
) -> Union[Mapping[str, Any], str]:
|
62
|
+
return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
|
63
|
+
stream_state=stream_state,
|
64
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
|
65
|
+
if stream_slice
|
66
|
+
else stream_slice,
|
67
|
+
next_page_token=next_page_token,
|
68
|
+
) | self._cursor_provider.get_request_body_data(
|
69
|
+
stream_state=stream_state,
|
70
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
|
71
|
+
if stream_slice
|
72
|
+
else stream_slice,
|
73
|
+
next_page_token=next_page_token,
|
74
|
+
)
|
75
|
+
|
76
|
+
def get_request_body_json(
|
77
|
+
self,
|
78
|
+
*,
|
79
|
+
stream_state: Optional[StreamState] = None,
|
80
|
+
stream_slice: Optional[StreamSlice] = None,
|
81
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
82
|
+
) -> Mapping[str, Any]:
|
83
|
+
return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
|
84
|
+
stream_state=stream_state,
|
85
|
+
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={})
|
86
|
+
if stream_slice
|
87
|
+
else stream_slice,
|
88
|
+
next_page_token=next_page_token,
|
89
|
+
) | self._cursor_provider.get_request_body_json(
|
90
|
+
stream_state=stream_state,
|
91
|
+
stream_slice=StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice)
|
92
|
+
if stream_slice
|
93
|
+
else stream_slice,
|
94
|
+
next_page_token=next_page_token,
|
95
|
+
)
|
@@ -17,6 +17,7 @@ from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
|
|
17
17
|
)
|
18
18
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
19
19
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
20
21
|
from airbyte_cdk.sources.types import Config
|
21
22
|
|
22
23
|
|
@@ -28,12 +29,14 @@ class HttpComponentsResolver(ComponentsResolver):
|
|
28
29
|
|
29
30
|
Attributes:
|
30
31
|
retriever (Retriever): The retriever used to fetch data from an API.
|
32
|
+
stream_slicer (StreamSlicer): The how the data is sliced.
|
31
33
|
config (Config): Configuration object for the resolver.
|
32
34
|
components_mapping (List[ComponentMappingDefinition]): List of mappings to resolve.
|
33
35
|
parameters (InitVar[Mapping[str, Any]]): Additional parameters for interpolation.
|
34
36
|
"""
|
35
37
|
|
36
38
|
retriever: Retriever
|
39
|
+
stream_slicer: StreamSlicer
|
37
40
|
config: Config
|
38
41
|
components_mapping: List[ComponentMappingDefinition]
|
39
42
|
parameters: InitVar[Mapping[str, Any]]
|
@@ -88,7 +91,7 @@ class HttpComponentsResolver(ComponentsResolver):
|
|
88
91
|
"""
|
89
92
|
kwargs = {"stream_template_config": stream_template_config}
|
90
93
|
|
91
|
-
for stream_slice in self.
|
94
|
+
for stream_slice in self.stream_slicer.stream_slices():
|
92
95
|
for components_values in self.retriever.read_records(
|
93
96
|
records_schema={}, stream_slice=stream_slice
|
94
97
|
):
|
@@ -5,6 +5,8 @@
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
+
from typing_extensions import deprecated
|
9
|
+
|
8
10
|
from airbyte_cdk.sources.streams.core import StreamData
|
9
11
|
from airbyte_cdk.sources.types import StreamSlice, StreamState
|
10
12
|
|
@@ -29,11 +31,13 @@ class Retriever:
|
|
29
31
|
"""
|
30
32
|
|
31
33
|
@abstractmethod
|
34
|
+
@deprecated("Stream slicing is being moved to the stream level.")
|
32
35
|
def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
|
33
36
|
"""Returns the stream slices"""
|
34
37
|
|
35
38
|
@property
|
36
39
|
@abstractmethod
|
40
|
+
@deprecated("State management is being moved to the stream level.")
|
37
41
|
def state(self) -> StreamState:
|
38
42
|
"""State getter, should return state in form that can serialized to a string and send to the output
|
39
43
|
as a STATE AirbyteMessage.
|
@@ -49,5 +53,6 @@ class Retriever:
|
|
49
53
|
|
50
54
|
@state.setter
|
51
55
|
@abstractmethod
|
56
|
+
@deprecated("State management is being moved to the stream level.")
|
52
57
|
def state(self, value: StreamState) -> None:
|
53
58
|
"""State setter, accept state serialized by state getter."""
|
@@ -95,6 +95,26 @@ class InMemoryMessageRepository(MessageRepository):
|
|
95
95
|
yield self._message_queue.popleft()
|
96
96
|
|
97
97
|
|
98
|
+
class StateFilteringMessageRepository(MessageRepository):
|
99
|
+
"""
|
100
|
+
This message repository is used when creating parent streams for SubstreamPartitionRouter. As the child stream
|
101
|
+
manages the state for both the child and the parents, we want to prevent parents from emitting state messages.
|
102
|
+
"""
|
103
|
+
|
104
|
+
def __init__(self, decorated: MessageRepository) -> None:
|
105
|
+
self._decorated = decorated
|
106
|
+
|
107
|
+
def emit_message(self, message: AirbyteMessage) -> None:
|
108
|
+
if message.type != Type.STATE:
|
109
|
+
self._decorated.emit_message(message)
|
110
|
+
|
111
|
+
def log_message(self, level: Level, message_provider: Callable[[], LogMessage]) -> None:
|
112
|
+
self._decorated.log_message(level, message_provider)
|
113
|
+
|
114
|
+
def consume_queue(self) -> Iterable[AirbyteMessage]:
|
115
|
+
yield from self._decorated.consume_queue()
|
116
|
+
|
117
|
+
|
98
118
|
class LogAppenderMessageRepositoryDecorator(MessageRepository):
|
99
119
|
def __init__(
|
100
120
|
self,
|
@@ -1,13 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.
|
3
|
+
Version: 6.62.0.dev0
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
7
7
|
Keywords: airbyte,connector-development-kit,cdk
|
8
8
|
Author: Airbyte
|
9
9
|
Author-email: contact@airbyte.io
|
10
|
-
Requires-Python: >=3.10,<3.
|
10
|
+
Requires-Python: >=3.10,<3.14
|
11
11
|
Classifier: Development Status :: 3 - Alpha
|
12
12
|
Classifier: Intended Audience :: Developers
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
18
19
|
Classifier: Topic :: Scientific/Engineering
|
19
20
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
21
|
Provides-Extra: dev
|
@@ -31,13 +32,13 @@ Requires-Dist: backoff
|
|
31
32
|
Requires-Dist: boltons (>=25.0.0,<26.0.0)
|
32
33
|
Requires-Dist: cachetools
|
33
34
|
Requires-Dist: click (>=8.1.8,<9.0.0)
|
34
|
-
Requires-Dist: cohere (
|
35
|
+
Requires-Dist: cohere (>=4.21,<6.0.0) ; extra == "vector-db-based"
|
35
36
|
Requires-Dist: cryptography (>=44.0.0,<45.0.0)
|
36
37
|
Requires-Dist: dateparser (>=1.2.2,<2.0.0)
|
37
38
|
Requires-Dist: dpath (>=2.1.6,<3.0.0)
|
38
39
|
Requires-Dist: dunamai (>=1.22.0,<2.0.0)
|
39
40
|
Requires-Dist: fastapi (>=0.116.1) ; extra == "manifest-server"
|
40
|
-
Requires-Dist: fastavro (>=1.
|
41
|
+
Requires-Dist: fastavro (>=1.11.0,<2.0.0) ; extra == "file-based"
|
41
42
|
Requires-Dist: genson (==1.3.0)
|
42
43
|
Requires-Dist: google-cloud-secret-manager (>=2.17.0,<3.0.0)
|
43
44
|
Requires-Dist: isodate (>=0.6.1,<0.7.0)
|
@@ -51,7 +52,7 @@ Requires-Dist: numpy (<2)
|
|
51
52
|
Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
|
52
53
|
Requires-Dist: orjson (>=3.10.7,<4.0.0)
|
53
54
|
Requires-Dist: packaging
|
54
|
-
Requires-Dist: pandas (==2.2.
|
55
|
+
Requires-Dist: pandas (==2.2.3)
|
55
56
|
Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
|
56
57
|
Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
|
57
58
|
Requires-Dist: psutil (==6.1.0)
|