airbyte-cdk 6.37.2.dev1__py3-none-any.whl → 6.37.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -7
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +7 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +72 -5
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -3
- airbyte_cdk/sources/declarative/interpolation/macros.py +2 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +34 -8
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +119 -7
- airbyte_cdk/sources/declarative/requesters/README.md +5 -5
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +18 -13
- airbyte_cdk/sources/declarative/requesters/http_requester.py +49 -17
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
- airbyte_cdk/sources/types.py +1 -0
- airbyte_cdk/utils/mapping_helpers.py +18 -1
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/METADATA +4 -4
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/RECORD +29 -33
- airbyte_cdk/sources/embedded/__init__.py +0 -3
- airbyte_cdk/sources/embedded/base_integration.py +0 -61
- airbyte_cdk/sources/embedded/catalog.py +0 -57
- airbyte_cdk/sources/embedded/runner.py +0 -57
- airbyte_cdk/sources/embedded/tools.py +0 -27
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.37.3.dist-info}/entry_points.txt +0 -0
@@ -1,8 +1,8 @@
|
|
1
1
|
# AsyncHttpJobRepository sequence diagram
|
2
2
|
|
3
3
|
- Components marked as optional are not required and can be ignored.
|
4
|
-
- if `
|
5
|
-
- interpolation_context, e.g. `
|
4
|
+
- if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
|
5
|
+
- interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
|
6
6
|
|
7
7
|
```mermaid
|
8
8
|
---
|
@@ -12,7 +12,7 @@ sequenceDiagram
|
|
12
12
|
participant AsyncHttpJobRepository as AsyncOrchestrator
|
13
13
|
participant CreationRequester as creation_requester
|
14
14
|
participant PollingRequester as polling_requester
|
15
|
-
participant UrlRequester as
|
15
|
+
participant UrlRequester as download_target_requester (Optional)
|
16
16
|
participant DownloadRetriever as download_retriever
|
17
17
|
participant AbortRequester as abort_requester (Optional)
|
18
18
|
participant DeleteRequester as delete_requester (Optional)
|
@@ -25,14 +25,14 @@ sequenceDiagram
|
|
25
25
|
|
26
26
|
loop Poll for job status
|
27
27
|
AsyncHttpJobRepository ->> PollingRequester: Check job status
|
28
|
-
PollingRequester ->> Reporting Server: Status request (interpolation_context: `
|
28
|
+
PollingRequester ->> Reporting Server: Status request (interpolation_context: `creation_response`)
|
29
29
|
Reporting Server -->> PollingRequester: Status response
|
30
30
|
PollingRequester -->> AsyncHttpJobRepository: Job status
|
31
31
|
end
|
32
32
|
|
33
33
|
alt Status: Ready
|
34
34
|
AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
|
35
|
-
UrlRequester ->> Reporting Server: URL request (interpolation_context: `
|
35
|
+
UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_response`)
|
36
36
|
Reporting Server -->> UrlRequester: Download URLs
|
37
37
|
UrlRequester -->> AsyncHttpJobRepository: Download URLs
|
38
38
|
|
@@ -43,13 +43,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
43
43
|
delete_requester: Optional[Requester]
|
44
44
|
status_extractor: DpathExtractor
|
45
45
|
status_mapping: Mapping[str, AsyncJobStatus]
|
46
|
-
|
46
|
+
download_target_extractor: DpathExtractor
|
47
47
|
|
48
48
|
job_timeout: Optional[timedelta] = None
|
49
49
|
record_extractor: RecordExtractor = field(
|
50
50
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
51
51
|
)
|
52
|
-
|
52
|
+
download_target_requester: Optional[Requester] = (
|
53
53
|
None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
|
54
54
|
)
|
55
55
|
|
@@ -211,12 +211,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
211
211
|
|
212
212
|
"""
|
213
213
|
|
214
|
-
for
|
214
|
+
for target_url in self._get_download_targets(job):
|
215
215
|
job_slice = job.job_parameters()
|
216
216
|
stream_slice = StreamSlice(
|
217
217
|
partition=job_slice.partition,
|
218
218
|
cursor_slice=job_slice.cursor_slice,
|
219
|
-
extra_fields={
|
219
|
+
extra_fields={
|
220
|
+
**job_slice.extra_fields,
|
221
|
+
"download_target": target_url,
|
222
|
+
},
|
220
223
|
)
|
221
224
|
for message in self.download_retriever.read_records({}, stream_slice):
|
222
225
|
if isinstance(message, Record):
|
@@ -269,27 +272,29 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
269
272
|
del self._polling_job_response_by_id[job_id]
|
270
273
|
|
271
274
|
def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
|
275
|
+
creation_response = self._create_job_response_by_id[job.api_job_id()].json()
|
272
276
|
stream_slice = StreamSlice(
|
273
|
-
partition={
|
277
|
+
partition={},
|
274
278
|
cursor_slice={},
|
279
|
+
extra_fields={"creation_response": creation_response},
|
275
280
|
)
|
276
281
|
return stream_slice
|
277
282
|
|
278
|
-
def
|
279
|
-
if not self.
|
283
|
+
def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
|
284
|
+
if not self.download_target_requester:
|
280
285
|
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
281
286
|
else:
|
287
|
+
polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
|
282
288
|
stream_slice: StreamSlice = StreamSlice(
|
283
|
-
partition={
|
284
|
-
"polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
|
285
|
-
},
|
289
|
+
partition={},
|
286
290
|
cursor_slice={},
|
291
|
+
extra_fields={"polling_response": polling_response},
|
287
292
|
)
|
288
|
-
url_response = self.
|
293
|
+
url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
289
294
|
if not url_response:
|
290
295
|
raise AirbyteTracedException(
|
291
|
-
internal_message="Always expect a response or an exception from
|
296
|
+
internal_message="Always expect a response or an exception from download_target_requester",
|
292
297
|
failure_type=FailureType.system_error,
|
293
298
|
)
|
294
299
|
|
295
|
-
yield from self.
|
300
|
+
yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
|
@@ -25,8 +25,8 @@ from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
|
25
25
|
from airbyte_cdk.sources.streams.call_rate import APIBudget
|
26
26
|
from airbyte_cdk.sources.streams.http import HttpClient
|
27
27
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
|
28
|
-
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
29
|
-
from airbyte_cdk.utils.mapping_helpers import combine_mappings
|
28
|
+
from airbyte_cdk.sources.types import Config, EmptyString, StreamSlice, StreamState
|
29
|
+
from airbyte_cdk.utils.mapping_helpers import combine_mappings, get_interpolation_context
|
30
30
|
|
31
31
|
|
32
32
|
@dataclass
|
@@ -49,9 +49,10 @@ class HttpRequester(Requester):
|
|
49
49
|
|
50
50
|
name: str
|
51
51
|
url_base: Union[InterpolatedString, str]
|
52
|
-
path: Union[InterpolatedString, str]
|
53
52
|
config: Config
|
54
53
|
parameters: InitVar[Mapping[str, Any]]
|
54
|
+
|
55
|
+
path: Optional[Union[InterpolatedString, str]] = None
|
55
56
|
authenticator: Optional[DeclarativeAuthenticator] = None
|
56
57
|
http_method: Union[str, HttpMethod] = HttpMethod.GET
|
57
58
|
request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
|
@@ -66,7 +67,9 @@ class HttpRequester(Requester):
|
|
66
67
|
|
67
68
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
68
69
|
self._url_base = InterpolatedString.create(self.url_base, parameters=parameters)
|
69
|
-
self._path = InterpolatedString.create(
|
70
|
+
self._path = InterpolatedString.create(
|
71
|
+
self.path if self.path else EmptyString, parameters=parameters
|
72
|
+
)
|
70
73
|
if self.request_options_provider is None:
|
71
74
|
self._request_options_provider = InterpolatedRequestOptionsProvider(
|
72
75
|
config=self.config, parameters=parameters
|
@@ -85,7 +88,7 @@ class HttpRequester(Requester):
|
|
85
88
|
self._parameters = parameters
|
86
89
|
|
87
90
|
if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
|
88
|
-
backoff_strategies = self.error_handler.backoff_strategies
|
91
|
+
backoff_strategies = self.error_handler.backoff_strategies # type: ignore
|
89
92
|
else:
|
90
93
|
backoff_strategies = None
|
91
94
|
|
@@ -112,21 +115,33 @@ class HttpRequester(Requester):
|
|
112
115
|
def get_authenticator(self) -> DeclarativeAuthenticator:
|
113
116
|
return self._authenticator
|
114
117
|
|
115
|
-
def get_url_base(
|
116
|
-
|
118
|
+
def get_url_base(
|
119
|
+
self,
|
120
|
+
*,
|
121
|
+
stream_state: Optional[StreamState] = None,
|
122
|
+
stream_slice: Optional[StreamSlice] = None,
|
123
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
|
+
) -> str:
|
125
|
+
interpolation_context = get_interpolation_context(
|
126
|
+
stream_state=stream_state,
|
127
|
+
stream_slice=stream_slice,
|
128
|
+
next_page_token=next_page_token,
|
129
|
+
)
|
130
|
+
return os.path.join(self._url_base.eval(self.config, **interpolation_context), EmptyString)
|
117
131
|
|
118
132
|
def get_path(
|
119
133
|
self,
|
120
134
|
*,
|
121
|
-
stream_state: Optional[StreamState],
|
122
|
-
stream_slice: Optional[StreamSlice],
|
123
|
-
next_page_token: Optional[Mapping[str, Any]],
|
135
|
+
stream_state: Optional[StreamState] = None,
|
136
|
+
stream_slice: Optional[StreamSlice] = None,
|
137
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
124
138
|
) -> str:
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
139
|
+
interpolation_context = get_interpolation_context(
|
140
|
+
stream_state=stream_state,
|
141
|
+
stream_slice=stream_slice,
|
142
|
+
next_page_token=next_page_token,
|
143
|
+
)
|
144
|
+
path = str(self._path.eval(self.config, **interpolation_context))
|
130
145
|
return path.lstrip("/")
|
131
146
|
|
132
147
|
def get_method(self) -> HttpMethod:
|
@@ -324,7 +339,20 @@ class HttpRequester(Requester):
|
|
324
339
|
|
325
340
|
@classmethod
|
326
341
|
def _join_url(cls, url_base: str, path: str) -> str:
|
327
|
-
|
342
|
+
"""
|
343
|
+
Joins a base URL with a given path and returns the resulting URL with any trailing slash removed.
|
344
|
+
|
345
|
+
This method ensures that there are no duplicate slashes when concatenating the base URL and the path,
|
346
|
+
which is useful when the full URL is provided from an interpolation context.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
url_base (str): The base URL to which the path will be appended.
|
350
|
+
path (str): The path to join with the base URL.
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
str: The concatenated URL with the trailing slash (if any) removed.
|
354
|
+
"""
|
355
|
+
return urljoin(url_base, path).rstrip("/")
|
328
356
|
|
329
357
|
def send_request(
|
330
358
|
self,
|
@@ -341,7 +369,11 @@ class HttpRequester(Requester):
|
|
341
369
|
request, response = self._http_client.send_request(
|
342
370
|
http_method=self.get_method().value,
|
343
371
|
url=self._join_url(
|
344
|
-
self.get_url_base(
|
372
|
+
self.get_url_base(
|
373
|
+
stream_state=stream_state,
|
374
|
+
stream_slice=stream_slice,
|
375
|
+
next_page_token=next_page_token,
|
376
|
+
),
|
345
377
|
path
|
346
378
|
or self.get_path(
|
347
379
|
stream_state=stream_state,
|
@@ -25,6 +25,7 @@ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
|
25
25
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
26
26
|
from airbyte_cdk.utils.mapping_helpers import (
|
27
27
|
_validate_component_request_option_paths,
|
28
|
+
get_interpolation_context,
|
28
29
|
)
|
29
30
|
|
30
31
|
|
@@ -150,11 +151,22 @@ class DefaultPaginator(Paginator):
|
|
150
151
|
else:
|
151
152
|
return None
|
152
153
|
|
153
|
-
def path(
|
154
|
+
def path(
|
155
|
+
self,
|
156
|
+
next_page_token: Optional[Mapping[str, Any]],
|
157
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
158
|
+
stream_slice: Optional[StreamSlice] = None,
|
159
|
+
) -> Optional[str]:
|
154
160
|
token = next_page_token.get("next_page_token") if next_page_token else None
|
155
161
|
if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
|
162
|
+
# make additional interpolation context
|
163
|
+
interpolation_context = get_interpolation_context(
|
164
|
+
stream_state=stream_state,
|
165
|
+
stream_slice=stream_slice,
|
166
|
+
next_page_token=next_page_token,
|
167
|
+
)
|
156
168
|
# Replace url base to only return the path
|
157
|
-
return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
169
|
+
return str(token).replace(self.url_base.eval(self.config, **interpolation_context), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
158
170
|
else:
|
159
171
|
return None
|
160
172
|
|
@@ -258,8 +270,17 @@ class PaginatorTestReadDecorator(Paginator):
|
|
258
270
|
response, last_page_size, last_record, last_page_token_value
|
259
271
|
)
|
260
272
|
|
261
|
-
def path(
|
262
|
-
|
273
|
+
def path(
|
274
|
+
self,
|
275
|
+
next_page_token: Optional[Mapping[str, Any]],
|
276
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
277
|
+
stream_slice: Optional[StreamSlice] = None,
|
278
|
+
) -> Optional[str]:
|
279
|
+
return self._decorated.path(
|
280
|
+
next_page_token=next_page_token,
|
281
|
+
stream_state=stream_state,
|
282
|
+
stream_slice=stream_slice,
|
283
|
+
)
|
263
284
|
|
264
285
|
def get_request_params(
|
265
286
|
self,
|
@@ -19,7 +19,12 @@ class NoPagination(Paginator):
|
|
19
19
|
|
20
20
|
parameters: InitVar[Mapping[str, Any]]
|
21
21
|
|
22
|
-
def path(
|
22
|
+
def path(
|
23
|
+
self,
|
24
|
+
next_page_token: Optional[Mapping[str, Any]],
|
25
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
26
|
+
stream_slice: Optional[StreamSlice] = None,
|
27
|
+
) -> Optional[str]:
|
23
28
|
return None
|
24
29
|
|
25
30
|
def get_request_params(
|
@@ -11,7 +11,7 @@ import requests
|
|
11
11
|
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
|
12
12
|
RequestOptionsProvider,
|
13
13
|
)
|
14
|
-
from airbyte_cdk.sources.types import Record
|
14
|
+
from airbyte_cdk.sources.types import Record, StreamSlice
|
15
15
|
|
16
16
|
|
17
17
|
@dataclass
|
@@ -49,7 +49,12 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
49
49
|
pass
|
50
50
|
|
51
51
|
@abstractmethod
|
52
|
-
def path(
|
52
|
+
def path(
|
53
|
+
self,
|
54
|
+
next_page_token: Optional[Mapping[str, Any]],
|
55
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
56
|
+
stream_slice: Optional[StreamSlice] = None,
|
57
|
+
) -> Optional[str]:
|
53
58
|
"""
|
54
59
|
Returns the URL path to hit to fetch the next page of records
|
55
60
|
|
@@ -35,7 +35,13 @@ class Requester(RequestOptionsProvider):
|
|
35
35
|
pass
|
36
36
|
|
37
37
|
@abstractmethod
|
38
|
-
def get_url_base(
|
38
|
+
def get_url_base(
|
39
|
+
self,
|
40
|
+
*,
|
41
|
+
stream_state: Optional[StreamState],
|
42
|
+
stream_slice: Optional[StreamSlice],
|
43
|
+
next_page_token: Optional[Mapping[str, Any]],
|
44
|
+
) -> str:
|
39
45
|
"""
|
40
46
|
:return: URL base for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "https://myapi.com/v1/"
|
41
47
|
"""
|
@@ -234,13 +234,22 @@ class SimpleRetriever(Retriever):
|
|
234
234
|
raise ValueError("Request body json cannot be a string")
|
235
235
|
return body_json
|
236
236
|
|
237
|
-
def _paginator_path(
|
237
|
+
def _paginator_path(
|
238
|
+
self,
|
239
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
240
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
241
|
+
stream_slice: Optional[StreamSlice] = None,
|
242
|
+
) -> Optional[str]:
|
238
243
|
"""
|
239
244
|
If the paginator points to a path, follow it, else return nothing so the requester is used.
|
240
245
|
:param next_page_token:
|
241
246
|
:return:
|
242
247
|
"""
|
243
|
-
return self._paginator.path(
|
248
|
+
return self._paginator.path(
|
249
|
+
next_page_token=next_page_token,
|
250
|
+
stream_state=stream_state,
|
251
|
+
stream_slice=stream_slice,
|
252
|
+
)
|
244
253
|
|
245
254
|
def _parse_response(
|
246
255
|
self,
|
@@ -299,7 +308,11 @@ class SimpleRetriever(Retriever):
|
|
299
308
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
300
309
|
) -> Optional[requests.Response]:
|
301
310
|
return self.requester.send_request(
|
302
|
-
path=self._paginator_path(
|
311
|
+
path=self._paginator_path(
|
312
|
+
next_page_token=next_page_token,
|
313
|
+
stream_state=stream_state,
|
314
|
+
stream_slice=stream_slice,
|
315
|
+
),
|
303
316
|
stream_state=stream_state,
|
304
317
|
stream_slice=stream_slice,
|
305
318
|
next_page_token=next_page_token,
|
@@ -570,7 +583,11 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
|
|
570
583
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
571
584
|
) -> Optional[requests.Response]:
|
572
585
|
return self.requester.send_request(
|
573
|
-
path=self._paginator_path(
|
586
|
+
path=self._paginator_path(
|
587
|
+
next_page_token=next_page_token,
|
588
|
+
stream_state=stream_state,
|
589
|
+
stream_slice=stream_slice,
|
590
|
+
),
|
574
591
|
stream_state=stream_state,
|
575
592
|
stream_slice=stream_slice,
|
576
593
|
next_page_token=next_page_token,
|
@@ -6,7 +6,7 @@ import re
|
|
6
6
|
from dataclasses import dataclass
|
7
7
|
from typing import Any, Dict, List, Optional
|
8
8
|
|
9
|
-
import
|
9
|
+
import anyascii
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
12
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -48,7 +48,7 @@ class KeysToSnakeCaseTransformation(RecordTransformation):
|
|
48
48
|
return self.tokens_to_snake_case(tokens)
|
49
49
|
|
50
50
|
def normalize_key(self, key: str) -> str:
|
51
|
-
return
|
51
|
+
return str(anyascii.anyascii(key))
|
52
52
|
|
53
53
|
def tokenize_key(self, key: str) -> List[str]:
|
54
54
|
tokens = []
|
@@ -50,7 +50,6 @@ class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage
|
|
50
50
|
|
51
51
|
def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
|
52
52
|
extra_args["path_to_yaml"] = self._path_to_yaml
|
53
|
-
self.logger.debug("declarative source created from parsed YAML manifest", extra=extra_args)
|
54
53
|
|
55
54
|
@staticmethod
|
56
55
|
def _parse(connection_definition_str: str) -> ConnectionDefinition:
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from enum import Enum
|
7
|
-
from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional, Tuple
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
9
|
if TYPE_CHECKING:
|
10
10
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
|
|
12
12
|
|
13
13
|
class ConcurrencyCompatibleStateType(Enum):
|
14
14
|
date_range = "date-range"
|
15
|
+
integer = "integer"
|
15
16
|
|
16
17
|
|
17
18
|
class AbstractStreamStateConverter(ABC):
|
airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import Any, Callable, MutableMapping, Optional, Tuple
|
6
|
+
|
7
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
9
|
+
AbstractStreamStateConverter,
|
10
|
+
ConcurrencyCompatibleStateType,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class IncrementingCountStreamStateConverter(AbstractStreamStateConverter):
|
15
|
+
def _from_state_message(self, value: Any) -> Any:
|
16
|
+
return value
|
17
|
+
|
18
|
+
def _to_state_message(self, value: Any) -> Any:
|
19
|
+
return value
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def get_end_provider(cls) -> Callable[[], float]:
|
23
|
+
return lambda: float("inf")
|
24
|
+
|
25
|
+
def convert_from_sequential_state(
|
26
|
+
self,
|
27
|
+
cursor_field: "CursorField", # to deprecate as it is only needed for sequential state
|
28
|
+
stream_state: MutableMapping[str, Any],
|
29
|
+
start: Optional[Any],
|
30
|
+
) -> Tuple[Any, MutableMapping[str, Any]]:
|
31
|
+
"""
|
32
|
+
Convert the state message to the format required by the ConcurrentCursor.
|
33
|
+
|
34
|
+
e.g.
|
35
|
+
{
|
36
|
+
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
37
|
+
"metadata": { … },
|
38
|
+
"slices": [
|
39
|
+
{"start": "10", "end": "2021-01-18T21:18:20.000+00:00"},
|
40
|
+
]
|
41
|
+
}
|
42
|
+
"""
|
43
|
+
sync_start = self._get_sync_start(cursor_field, stream_state, start)
|
44
|
+
if self.is_state_message_compatible(stream_state):
|
45
|
+
return sync_start, stream_state
|
46
|
+
|
47
|
+
# Create a slice to represent the records synced during prior syncs.
|
48
|
+
# The start and end are the same to avoid confusion as to whether the records for this slice
|
49
|
+
# were actually synced
|
50
|
+
slices = [
|
51
|
+
{
|
52
|
+
self.START_KEY: start if start is not None else sync_start,
|
53
|
+
self.END_KEY: sync_start, # this may not be relevant anymore
|
54
|
+
self.MOST_RECENT_RECORD_KEY: sync_start,
|
55
|
+
}
|
56
|
+
]
|
57
|
+
|
58
|
+
return sync_start, {
|
59
|
+
"state_type": ConcurrencyCompatibleStateType.integer.value,
|
60
|
+
"slices": slices,
|
61
|
+
"legacy": stream_state,
|
62
|
+
}
|
63
|
+
|
64
|
+
def parse_value(self, value: int) -> int:
|
65
|
+
return value
|
66
|
+
|
67
|
+
@property
|
68
|
+
def zero_value(self) -> int:
|
69
|
+
return 0
|
70
|
+
|
71
|
+
def increment(self, value: int) -> int:
|
72
|
+
return value + 1
|
73
|
+
|
74
|
+
def output_format(self, value: int) -> int:
|
75
|
+
return value
|
76
|
+
|
77
|
+
def _get_sync_start(
|
78
|
+
self,
|
79
|
+
cursor_field: CursorField,
|
80
|
+
stream_state: MutableMapping[str, Any],
|
81
|
+
start: Optional[int],
|
82
|
+
) -> int:
|
83
|
+
sync_start = start if start is not None else self.zero_value
|
84
|
+
prev_sync_low_water_mark: Optional[int] = (
|
85
|
+
stream_state[cursor_field.cursor_field_key]
|
86
|
+
if cursor_field.cursor_field_key in stream_state
|
87
|
+
else None
|
88
|
+
)
|
89
|
+
if prev_sync_low_water_mark and prev_sync_low_water_mark >= sync_start:
|
90
|
+
return prev_sync_low_water_mark
|
91
|
+
else:
|
92
|
+
return sync_start
|
@@ -19,9 +19,9 @@ DEFAULT_ERROR_MAPPING: Mapping[Union[int, str, Type[Exception]], ErrorResolution
|
|
19
19
|
error_message="Invalid Protocol Schema: The endpoint that data is being requested from is using an invalid or insecure. Exception: requests.exceptions.InvalidSchema",
|
20
20
|
),
|
21
21
|
InvalidURL: ErrorResolution(
|
22
|
-
response_action=ResponseAction.
|
23
|
-
failure_type=FailureType.
|
24
|
-
error_message="Invalid URL specified: The endpoint that data is being requested from is not a valid URL. Exception: requests.exceptions.InvalidURL",
|
22
|
+
response_action=ResponseAction.RETRY,
|
23
|
+
failure_type=FailureType.transient_error,
|
24
|
+
error_message="Invalid URL specified or DNS error occurred: The endpoint that data is being requested from is not a valid URL. Exception: requests.exceptions.InvalidURL",
|
25
25
|
),
|
26
26
|
RequestException: ErrorResolution(
|
27
27
|
response_action=ResponseAction.RETRY,
|
airbyte_cdk/sources/types.py
CHANGED
@@ -10,7 +10,7 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
|
|
10
10
|
RequestOption,
|
11
11
|
RequestOptionType,
|
12
12
|
)
|
13
|
-
from airbyte_cdk.sources.types import Config
|
13
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
14
14
|
|
15
15
|
|
16
16
|
def _merge_mappings(
|
@@ -143,3 +143,20 @@ def _validate_component_request_option_paths(
|
|
143
143
|
)
|
144
144
|
except ValueError as error:
|
145
145
|
raise ValueError(error)
|
146
|
+
|
147
|
+
|
148
|
+
def get_interpolation_context(
|
149
|
+
stream_state: Optional[StreamState] = None,
|
150
|
+
stream_slice: Optional[StreamSlice] = None,
|
151
|
+
next_page_token: Optional[Mapping[str, Any]] = None,
|
152
|
+
) -> Mapping[str, Any]:
|
153
|
+
return {
|
154
|
+
"stream_slice": stream_slice,
|
155
|
+
"next_page_token": next_page_token,
|
156
|
+
# update the context with extra fields, if passed.
|
157
|
+
**(
|
158
|
+
stream_slice.extra_fields
|
159
|
+
if stream_slice is not None and hasattr(stream_slice, "extra_fields")
|
160
|
+
else {}
|
161
|
+
),
|
162
|
+
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.37.
|
3
|
+
Version: 6.37.3
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,13 +22,13 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
-
Requires-Dist: Unidecode (>=1.3,<2.0)
|
26
25
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
|
+
Requires-Dist: anyascii (>=0.3.2,<0.4.0)
|
27
27
|
Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
|
28
28
|
Requires-Dist: backoff
|
29
29
|
Requires-Dist: cachetools
|
30
30
|
Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
|
31
|
-
Requires-Dist: cryptography (>=
|
31
|
+
Requires-Dist: cryptography (>=44.0.0,<45.0.0)
|
32
32
|
Requires-Dist: dpath (>=2.1.6,<3.0.0)
|
33
33
|
Requires-Dist: dunamai (>=1.22.0,<2.0.0)
|
34
34
|
Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
|
@@ -47,7 +47,7 @@ Requires-Dist: pandas (==2.2.2)
|
|
47
47
|
Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
|
48
48
|
Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
|
49
49
|
Requires-Dist: psutil (==6.1.0)
|
50
|
-
Requires-Dist: pyarrow (>=
|
50
|
+
Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "file-based"
|
51
51
|
Requires-Dist: pydantic (>=2.7,<3.0)
|
52
52
|
Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
|
53
53
|
Requires-Dist: pyrate-limiter (>=3.1.0,<3.2.0)
|