airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
- airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
- airbyte_cdk/sources/types.py +4 -2
- airbyte_cdk/sources/utils/transform.py +23 -2
- airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/mapping_helpers.py +27 -86
- airbyte_cdk/utils/slice_hasher.py +8 -1
- airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -4,11 +4,11 @@
|
|
4
4
|
|
5
5
|
import logging
|
6
6
|
from abc import abstractmethod
|
7
|
+
from datetime import timedelta
|
7
8
|
from json import JSONDecodeError
|
8
9
|
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
9
10
|
|
10
11
|
import backoff
|
11
|
-
import pendulum
|
12
12
|
import requests
|
13
13
|
from requests.auth import AuthBase
|
14
14
|
|
@@ -17,6 +17,7 @@ from airbyte_cdk.sources.http_logger import format_http_message
|
|
17
17
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
18
18
|
from airbyte_cdk.utils import AirbyteTracedException
|
19
19
|
from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets
|
20
|
+
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
|
20
21
|
|
21
22
|
from ..exceptions import DefaultBackoffException
|
22
23
|
|
@@ -24,6 +25,13 @@ logger = logging.getLogger("airbyte")
|
|
24
25
|
_NOOP_MESSAGE_REPOSITORY = NoopMessageRepository()
|
25
26
|
|
26
27
|
|
28
|
+
class ResponseKeysMaxRecurtionReached(AirbyteTracedException):
|
29
|
+
"""
|
30
|
+
Raised when the max level of recursion is reached, when trying to
|
31
|
+
find-and-get the target key, during the `_make_handled_request`
|
32
|
+
"""
|
33
|
+
|
34
|
+
|
27
35
|
class AbstractOauth2Authenticator(AuthBase):
|
28
36
|
"""
|
29
37
|
Abstract class for an OAuth authenticators that implements the OAuth token refresh flow. The authenticator
|
@@ -52,15 +60,31 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
52
60
|
request.headers.update(self.get_auth_header())
|
53
61
|
return request
|
54
62
|
|
63
|
+
@property
|
64
|
+
def _is_access_token_flow(self) -> bool:
|
65
|
+
return self.get_token_refresh_endpoint() is None and self.access_token is not None
|
66
|
+
|
67
|
+
@property
|
68
|
+
def token_expiry_is_time_of_expiration(self) -> bool:
|
69
|
+
"""
|
70
|
+
Indicates that the Token Expiry returns the date until which the token will be valid, not the amount of time it will be valid.
|
71
|
+
"""
|
72
|
+
|
73
|
+
return False
|
74
|
+
|
75
|
+
@property
|
76
|
+
def token_expiry_date_format(self) -> Optional[str]:
|
77
|
+
"""
|
78
|
+
Format of the datetime; exists it if expires_in is returned as the expiration datetime instead of seconds until it expires
|
79
|
+
"""
|
80
|
+
|
81
|
+
return None
|
82
|
+
|
55
83
|
def get_auth_header(self) -> Mapping[str, Any]:
|
56
84
|
"""HTTP header to set on the requests"""
|
57
85
|
token = self.access_token if self._is_access_token_flow else self.get_access_token()
|
58
86
|
return {"Authorization": f"Bearer {token}"}
|
59
87
|
|
60
|
-
@property
|
61
|
-
def _is_access_token_flow(self) -> bool:
|
62
|
-
return self.get_token_refresh_endpoint() is None and self.access_token is not None
|
63
|
-
|
64
88
|
def get_access_token(self) -> str:
|
65
89
|
"""Returns the access token"""
|
66
90
|
if self.token_has_expired():
|
@@ -72,7 +96,7 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
72
96
|
|
73
97
|
def token_has_expired(self) -> bool:
|
74
98
|
"""Returns True if the token is expired"""
|
75
|
-
return
|
99
|
+
return ab_datetime_now() > self.get_token_expiry_date()
|
76
100
|
|
77
101
|
def build_refresh_request_body(self) -> Mapping[str, Any]:
|
78
102
|
"""
|
@@ -106,9 +130,39 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
106
130
|
headers = self.get_refresh_request_headers()
|
107
131
|
return headers if headers else None
|
108
132
|
|
133
|
+
def refresh_access_token(self) -> Tuple[str, Union[str, int]]:
|
134
|
+
"""
|
135
|
+
Returns the refresh token and its expiration datetime
|
136
|
+
|
137
|
+
:return: a tuple of (access_token, token_lifespan)
|
138
|
+
"""
|
139
|
+
response_json = self._make_handled_request()
|
140
|
+
self._ensure_access_token_in_response(response_json)
|
141
|
+
|
142
|
+
return (
|
143
|
+
self._extract_access_token(response_json),
|
144
|
+
self._extract_token_expiry_date(response_json),
|
145
|
+
)
|
146
|
+
|
147
|
+
# ----------------
|
148
|
+
# PRIVATE METHODS
|
149
|
+
# ----------------
|
150
|
+
|
109
151
|
def _wrap_refresh_token_exception(
|
110
152
|
self, exception: requests.exceptions.RequestException
|
111
153
|
) -> bool:
|
154
|
+
"""
|
155
|
+
Wraps and handles exceptions that occur during the refresh token process.
|
156
|
+
|
157
|
+
This method checks if the provided exception is related to a refresh token error
|
158
|
+
by examining the response status code and specific error content.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
exception (requests.exceptions.RequestException): The exception raised during the request.
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
bool: True if the exception is related to a refresh token error, False otherwise.
|
165
|
+
"""
|
112
166
|
try:
|
113
167
|
if exception.response is not None:
|
114
168
|
exception_content = exception.response.json()
|
@@ -130,7 +184,24 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
130
184
|
),
|
131
185
|
max_time=300,
|
132
186
|
)
|
133
|
-
def
|
187
|
+
def _make_handled_request(self) -> Any:
|
188
|
+
"""
|
189
|
+
Makes a handled HTTP request to refresh an OAuth token.
|
190
|
+
|
191
|
+
This method sends a POST request to the token refresh endpoint with the necessary
|
192
|
+
headers and body to obtain a new access token. It handles various exceptions that
|
193
|
+
may occur during the request and logs the response for troubleshooting purposes.
|
194
|
+
|
195
|
+
Returns:
|
196
|
+
Mapping[str, Any]: The JSON response from the token refresh endpoint.
|
197
|
+
|
198
|
+
Raises:
|
199
|
+
DefaultBackoffException: If the response status code is 429 (Too Many Requests)
|
200
|
+
or any 5xx server error.
|
201
|
+
AirbyteTracedException: If the refresh token is invalid or expired, prompting
|
202
|
+
re-authentication.
|
203
|
+
Exception: For any other exceptions that occur during the request.
|
204
|
+
"""
|
134
205
|
try:
|
135
206
|
response = requests.request(
|
136
207
|
method="POST",
|
@@ -138,22 +209,10 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
138
209
|
data=self.build_refresh_request_body(),
|
139
210
|
headers=self.build_refresh_request_headers(),
|
140
211
|
)
|
141
|
-
if
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
access_key = response_json.get(self.get_access_token_name())
|
146
|
-
if not access_key:
|
147
|
-
raise Exception(
|
148
|
-
"Token refresh API response was missing access token {self.get_access_token_name()}"
|
149
|
-
)
|
150
|
-
add_to_secrets(access_key)
|
151
|
-
self._log_response(response)
|
152
|
-
return response_json
|
153
|
-
else:
|
154
|
-
# log the response even if the request failed for troubleshooting purposes
|
155
|
-
self._log_response(response)
|
156
|
-
response.raise_for_status()
|
212
|
+
# log the response even if the request failed for troubleshooting purposes
|
213
|
+
self._log_response(response)
|
214
|
+
response.raise_for_status()
|
215
|
+
return response.json()
|
157
216
|
except requests.exceptions.RequestException as e:
|
158
217
|
if e.response is not None:
|
159
218
|
if e.response.status_code == 429 or e.response.status_code >= 500:
|
@@ -167,19 +226,36 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
167
226
|
except Exception as e:
|
168
227
|
raise Exception(f"Error while refreshing access token: {e}") from e
|
169
228
|
|
170
|
-
def
|
229
|
+
def _ensure_access_token_in_response(self, response_data: Mapping[str, Any]) -> None:
|
171
230
|
"""
|
172
|
-
|
231
|
+
Ensures that the access token is present in the response data.
|
173
232
|
|
174
|
-
|
175
|
-
|
176
|
-
|
233
|
+
This method attempts to extract the access token from the provided response data.
|
234
|
+
If the access token is not found, it raises an exception indicating that the token
|
235
|
+
refresh API response was missing the access token. If the access token is found,
|
236
|
+
it adds the token to the list of secrets to ensure it is replaced before logging
|
237
|
+
the response.
|
177
238
|
|
178
|
-
|
179
|
-
|
180
|
-
|
239
|
+
Args:
|
240
|
+
response_data (Mapping[str, Any]): The response data from which to extract the access token.
|
241
|
+
|
242
|
+
Raises:
|
243
|
+
Exception: If the access token is not found in the response data.
|
244
|
+
ResponseKeysMaxRecurtionReached: If the maximum recursion depth is reached while extracting the access token.
|
245
|
+
"""
|
246
|
+
try:
|
247
|
+
access_key = self._extract_access_token(response_data)
|
248
|
+
if not access_key:
|
249
|
+
raise Exception(
|
250
|
+
"Token refresh API response was missing access token {self.get_access_token_name()}"
|
251
|
+
)
|
252
|
+
# Add the access token to the list of secrets so it is replaced before logging the response
|
253
|
+
# An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen...
|
254
|
+
add_to_secrets(access_key)
|
255
|
+
except ResponseKeysMaxRecurtionReached as e:
|
256
|
+
raise e
|
181
257
|
|
182
|
-
def _parse_token_expiration_date(self, value: Union[str, int]) ->
|
258
|
+
def _parse_token_expiration_date(self, value: Union[str, int]) -> AirbyteDateTime:
|
183
259
|
"""
|
184
260
|
Return the expiration datetime of the refresh token
|
185
261
|
|
@@ -191,26 +267,139 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
191
267
|
raise ValueError(
|
192
268
|
f"Invalid token expiry date format {self.token_expiry_date_format}; a string representing the format is required."
|
193
269
|
)
|
194
|
-
|
270
|
+
try:
|
271
|
+
return ab_datetime_parse(str(value))
|
272
|
+
except ValueError as e:
|
273
|
+
raise ValueError(f"Invalid token expiry date format: {e}")
|
195
274
|
else:
|
196
|
-
|
275
|
+
try:
|
276
|
+
# Only accept numeric values (as int/float/string) when no format specified
|
277
|
+
seconds = int(float(str(value)))
|
278
|
+
return ab_datetime_now() + timedelta(seconds=seconds)
|
279
|
+
except (ValueError, TypeError):
|
280
|
+
raise ValueError(
|
281
|
+
f"Invalid expires_in value: {value}. Expected number of seconds when no format specified."
|
282
|
+
)
|
197
283
|
|
198
|
-
|
199
|
-
def token_expiry_is_time_of_expiration(self) -> bool:
|
284
|
+
def _extract_access_token(self, response_data: Mapping[str, Any]) -> Any:
|
200
285
|
"""
|
201
|
-
|
286
|
+
Extracts the access token from the given response data.
|
287
|
+
|
288
|
+
Args:
|
289
|
+
response_data (Mapping[str, Any]): The response data from which to extract the access token.
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
str: The extracted access token.
|
202
293
|
"""
|
294
|
+
return self._find_and_get_value_from_response(response_data, self.get_access_token_name())
|
203
295
|
|
204
|
-
|
296
|
+
def _extract_refresh_token(self, response_data: Mapping[str, Any]) -> Any:
|
297
|
+
"""
|
298
|
+
Extracts the refresh token from the given response data.
|
205
299
|
|
206
|
-
|
207
|
-
|
300
|
+
Args:
|
301
|
+
response_data (Mapping[str, Any]): The response data from which to extract the refresh token.
|
302
|
+
|
303
|
+
Returns:
|
304
|
+
str: The extracted refresh token.
|
208
305
|
"""
|
209
|
-
|
306
|
+
return self._find_and_get_value_from_response(response_data, self.get_refresh_token_name())
|
307
|
+
|
308
|
+
def _extract_token_expiry_date(self, response_data: Mapping[str, Any]) -> Any:
|
309
|
+
"""
|
310
|
+
Extracts the token_expiry_date, like `expires_in` or `expires_at`, etc from the given response data.
|
311
|
+
|
312
|
+
Args:
|
313
|
+
response_data (Mapping[str, Any]): The response data from which to extract the token_expiry_date.
|
314
|
+
|
315
|
+
Returns:
|
316
|
+
str: The extracted token_expiry_date.
|
210
317
|
"""
|
318
|
+
return self._find_and_get_value_from_response(response_data, self.get_expires_in_name())
|
319
|
+
|
320
|
+
def _find_and_get_value_from_response(
|
321
|
+
self,
|
322
|
+
response_data: Mapping[str, Any],
|
323
|
+
key_name: str,
|
324
|
+
max_depth: int = 5,
|
325
|
+
current_depth: int = 0,
|
326
|
+
) -> Any:
|
327
|
+
"""
|
328
|
+
Recursively searches for a specified key in a nested dictionary or list and returns its value if found.
|
329
|
+
|
330
|
+
Args:
|
331
|
+
response_data (Mapping[str, Any]): The response data to search through, which can be a dictionary or a list.
|
332
|
+
key_name (str): The key to search for in the response data.
|
333
|
+
max_depth (int, optional): The maximum depth to search for the key to avoid infinite recursion. Defaults to 5.
|
334
|
+
current_depth (int, optional): The current depth of the recursion. Defaults to 0.
|
335
|
+
|
336
|
+
Returns:
|
337
|
+
Any: The value associated with the specified key if found, otherwise None.
|
338
|
+
|
339
|
+
Raises:
|
340
|
+
AirbyteTracedException: If the maximum recursion depth is reached without finding the key.
|
341
|
+
"""
|
342
|
+
if current_depth > max_depth:
|
343
|
+
# this is needed to avoid an inf loop, possible with a very deep nesting observed.
|
344
|
+
message = f"The maximum level of recursion is reached. Couldn't find the speficied `{key_name}` in the response."
|
345
|
+
raise ResponseKeysMaxRecurtionReached(
|
346
|
+
internal_message=message, message=message, failure_type=FailureType.config_error
|
347
|
+
)
|
348
|
+
|
349
|
+
if isinstance(response_data, dict):
|
350
|
+
# get from the root level
|
351
|
+
if key_name in response_data:
|
352
|
+
return response_data[key_name]
|
353
|
+
|
354
|
+
# get from the nested object
|
355
|
+
for _, value in response_data.items():
|
356
|
+
result = self._find_and_get_value_from_response(
|
357
|
+
value, key_name, max_depth, current_depth + 1
|
358
|
+
)
|
359
|
+
if result is not None:
|
360
|
+
return result
|
361
|
+
|
362
|
+
# get from the nested array object
|
363
|
+
elif isinstance(response_data, list):
|
364
|
+
for item in response_data:
|
365
|
+
result = self._find_and_get_value_from_response(
|
366
|
+
item, key_name, max_depth, current_depth + 1
|
367
|
+
)
|
368
|
+
if result is not None:
|
369
|
+
return result
|
211
370
|
|
212
371
|
return None
|
213
372
|
|
373
|
+
@property
|
374
|
+
def _message_repository(self) -> Optional[MessageRepository]:
|
375
|
+
"""
|
376
|
+
The implementation can define a message_repository if it wants debugging logs for HTTP requests
|
377
|
+
"""
|
378
|
+
return _NOOP_MESSAGE_REPOSITORY
|
379
|
+
|
380
|
+
def _log_response(self, response: requests.Response) -> None:
|
381
|
+
"""
|
382
|
+
Logs the HTTP response using the message repository if it is available.
|
383
|
+
|
384
|
+
Args:
|
385
|
+
response (requests.Response): The HTTP response to log.
|
386
|
+
"""
|
387
|
+
if self._message_repository:
|
388
|
+
self._message_repository.log_message(
|
389
|
+
Level.DEBUG,
|
390
|
+
lambda: format_http_message(
|
391
|
+
response,
|
392
|
+
"Refresh token",
|
393
|
+
"Obtains access token",
|
394
|
+
self._NO_STREAM_NAME,
|
395
|
+
is_auxiliary=True,
|
396
|
+
),
|
397
|
+
)
|
398
|
+
|
399
|
+
# ----------------
|
400
|
+
# ABSTR METHODS
|
401
|
+
# ----------------
|
402
|
+
|
214
403
|
@abstractmethod
|
215
404
|
def get_token_refresh_endpoint(self) -> Optional[str]:
|
216
405
|
"""Returns the endpoint to refresh the access token"""
|
@@ -244,7 +433,7 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
244
433
|
"""List of requested scopes"""
|
245
434
|
|
246
435
|
@abstractmethod
|
247
|
-
def get_token_expiry_date(self) ->
|
436
|
+
def get_token_expiry_date(self) -> AirbyteDateTime:
|
248
437
|
"""Expiration date of the access token"""
|
249
438
|
|
250
439
|
@abstractmethod
|
@@ -284,23 +473,3 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
284
473
|
@abstractmethod
|
285
474
|
def access_token(self, value: str) -> str:
|
286
475
|
"""Setter for the access token"""
|
287
|
-
|
288
|
-
@property
|
289
|
-
def _message_repository(self) -> Optional[MessageRepository]:
|
290
|
-
"""
|
291
|
-
The implementation can define a message_repository if it wants debugging logs for HTTP requests
|
292
|
-
"""
|
293
|
-
return _NOOP_MESSAGE_REPOSITORY
|
294
|
-
|
295
|
-
def _log_response(self, response: requests.Response) -> None:
|
296
|
-
if self._message_repository:
|
297
|
-
self._message_repository.log_message(
|
298
|
-
Level.DEBUG,
|
299
|
-
lambda: format_http_message(
|
300
|
-
response,
|
301
|
-
"Refresh token",
|
302
|
-
"Obtains access token",
|
303
|
-
self._NO_STREAM_NAME,
|
304
|
-
is_auxiliary=True,
|
305
|
-
),
|
306
|
-
)
|