airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
  11. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
  16. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  17. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  18. airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
  19. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
  20. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  21. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
  22. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  23. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  24. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
  25. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  26. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  27. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
  28. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
  29. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  30. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  31. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  32. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
  33. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  34. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  35. airbyte_cdk/sources/http_logger.py +1 -1
  36. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  37. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  38. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  39. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  40. airbyte_cdk/sources/streams/core.py +6 -6
  41. airbyte_cdk/sources/streams/http/http.py +1 -2
  42. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  43. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
  44. airbyte_cdk/sources/types.py +4 -2
  45. airbyte_cdk/sources/utils/transform.py +23 -2
  46. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  47. airbyte_cdk/utils/datetime_helpers.py +499 -0
  48. airbyte_cdk/utils/mapping_helpers.py +27 -86
  49. airbyte_cdk/utils/slice_hasher.py +8 -1
  50. airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
  51. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
  52. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
  53. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
  54. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
  55. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -4,11 +4,11 @@
4
4
 
5
5
  import logging
6
6
  from abc import abstractmethod
7
+ from datetime import timedelta
7
8
  from json import JSONDecodeError
8
9
  from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
9
10
 
10
11
  import backoff
11
- import pendulum
12
12
  import requests
13
13
  from requests.auth import AuthBase
14
14
 
@@ -17,6 +17,7 @@ from airbyte_cdk.sources.http_logger import format_http_message
17
17
  from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
18
18
  from airbyte_cdk.utils import AirbyteTracedException
19
19
  from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets
20
+ from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
20
21
 
21
22
  from ..exceptions import DefaultBackoffException
22
23
 
@@ -24,6 +25,13 @@ logger = logging.getLogger("airbyte")
24
25
  _NOOP_MESSAGE_REPOSITORY = NoopMessageRepository()
25
26
 
26
27
 
28
+ class ResponseKeysMaxRecurtionReached(AirbyteTracedException):
29
+ """
30
+ Raised when the max level of recursion is reached, when trying to
31
+ find-and-get the target key, during the `_make_handled_request`
32
+ """
33
+
34
+
27
35
  class AbstractOauth2Authenticator(AuthBase):
28
36
  """
29
37
  Abstract class for an OAuth authenticators that implements the OAuth token refresh flow. The authenticator
@@ -52,15 +60,31 @@ class AbstractOauth2Authenticator(AuthBase):
52
60
  request.headers.update(self.get_auth_header())
53
61
  return request
54
62
 
63
+ @property
64
+ def _is_access_token_flow(self) -> bool:
65
+ return self.get_token_refresh_endpoint() is None and self.access_token is not None
66
+
67
+ @property
68
+ def token_expiry_is_time_of_expiration(self) -> bool:
69
+ """
70
+ Indicates that the Token Expiry returns the date until which the token will be valid, not the amount of time it will be valid.
71
+ """
72
+
73
+ return False
74
+
75
+ @property
76
+ def token_expiry_date_format(self) -> Optional[str]:
77
+ """
78
+ Format of the datetime; exists it if expires_in is returned as the expiration datetime instead of seconds until it expires
79
+ """
80
+
81
+ return None
82
+
55
83
  def get_auth_header(self) -> Mapping[str, Any]:
56
84
  """HTTP header to set on the requests"""
57
85
  token = self.access_token if self._is_access_token_flow else self.get_access_token()
58
86
  return {"Authorization": f"Bearer {token}"}
59
87
 
60
- @property
61
- def _is_access_token_flow(self) -> bool:
62
- return self.get_token_refresh_endpoint() is None and self.access_token is not None
63
-
64
88
  def get_access_token(self) -> str:
65
89
  """Returns the access token"""
66
90
  if self.token_has_expired():
@@ -72,7 +96,7 @@ class AbstractOauth2Authenticator(AuthBase):
72
96
 
73
97
  def token_has_expired(self) -> bool:
74
98
  """Returns True if the token is expired"""
75
- return pendulum.now() > self.get_token_expiry_date() # type: ignore # this is always a bool despite what mypy thinks
99
+ return ab_datetime_now() > self.get_token_expiry_date()
76
100
 
77
101
  def build_refresh_request_body(self) -> Mapping[str, Any]:
78
102
  """
@@ -106,9 +130,39 @@ class AbstractOauth2Authenticator(AuthBase):
106
130
  headers = self.get_refresh_request_headers()
107
131
  return headers if headers else None
108
132
 
133
+ def refresh_access_token(self) -> Tuple[str, Union[str, int]]:
134
+ """
135
+ Returns the refresh token and its expiration datetime
136
+
137
+ :return: a tuple of (access_token, token_lifespan)
138
+ """
139
+ response_json = self._make_handled_request()
140
+ self._ensure_access_token_in_response(response_json)
141
+
142
+ return (
143
+ self._extract_access_token(response_json),
144
+ self._extract_token_expiry_date(response_json),
145
+ )
146
+
147
+ # ----------------
148
+ # PRIVATE METHODS
149
+ # ----------------
150
+
109
151
  def _wrap_refresh_token_exception(
110
152
  self, exception: requests.exceptions.RequestException
111
153
  ) -> bool:
154
+ """
155
+ Wraps and handles exceptions that occur during the refresh token process.
156
+
157
+ This method checks if the provided exception is related to a refresh token error
158
+ by examining the response status code and specific error content.
159
+
160
+ Args:
161
+ exception (requests.exceptions.RequestException): The exception raised during the request.
162
+
163
+ Returns:
164
+ bool: True if the exception is related to a refresh token error, False otherwise.
165
+ """
112
166
  try:
113
167
  if exception.response is not None:
114
168
  exception_content = exception.response.json()
@@ -130,7 +184,24 @@ class AbstractOauth2Authenticator(AuthBase):
130
184
  ),
131
185
  max_time=300,
132
186
  )
133
- def _get_refresh_access_token_response(self) -> Any:
187
+ def _make_handled_request(self) -> Any:
188
+ """
189
+ Makes a handled HTTP request to refresh an OAuth token.
190
+
191
+ This method sends a POST request to the token refresh endpoint with the necessary
192
+ headers and body to obtain a new access token. It handles various exceptions that
193
+ may occur during the request and logs the response for troubleshooting purposes.
194
+
195
+ Returns:
196
+ Mapping[str, Any]: The JSON response from the token refresh endpoint.
197
+
198
+ Raises:
199
+ DefaultBackoffException: If the response status code is 429 (Too Many Requests)
200
+ or any 5xx server error.
201
+ AirbyteTracedException: If the refresh token is invalid or expired, prompting
202
+ re-authentication.
203
+ Exception: For any other exceptions that occur during the request.
204
+ """
134
205
  try:
135
206
  response = requests.request(
136
207
  method="POST",
@@ -138,22 +209,10 @@ class AbstractOauth2Authenticator(AuthBase):
138
209
  data=self.build_refresh_request_body(),
139
210
  headers=self.build_refresh_request_headers(),
140
211
  )
141
- if response.ok:
142
- response_json = response.json()
143
- # Add the access token to the list of secrets so it is replaced before logging the response
144
- # An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen...
145
- access_key = response_json.get(self.get_access_token_name())
146
- if not access_key:
147
- raise Exception(
148
- "Token refresh API response was missing access token {self.get_access_token_name()}"
149
- )
150
- add_to_secrets(access_key)
151
- self._log_response(response)
152
- return response_json
153
- else:
154
- # log the response even if the request failed for troubleshooting purposes
155
- self._log_response(response)
156
- response.raise_for_status()
212
+ # log the response even if the request failed for troubleshooting purposes
213
+ self._log_response(response)
214
+ response.raise_for_status()
215
+ return response.json()
157
216
  except requests.exceptions.RequestException as e:
158
217
  if e.response is not None:
159
218
  if e.response.status_code == 429 or e.response.status_code >= 500:
@@ -167,19 +226,36 @@ class AbstractOauth2Authenticator(AuthBase):
167
226
  except Exception as e:
168
227
  raise Exception(f"Error while refreshing access token: {e}") from e
169
228
 
170
- def refresh_access_token(self) -> Tuple[str, Union[str, int]]:
229
+ def _ensure_access_token_in_response(self, response_data: Mapping[str, Any]) -> None:
171
230
  """
172
- Returns the refresh token and its expiration datetime
231
+ Ensures that the access token is present in the response data.
173
232
 
174
- :return: a tuple of (access_token, token_lifespan)
175
- """
176
- response_json = self._get_refresh_access_token_response()
233
+ This method attempts to extract the access token from the provided response data.
234
+ If the access token is not found, it raises an exception indicating that the token
235
+ refresh API response was missing the access token. If the access token is found,
236
+ it adds the token to the list of secrets to ensure it is replaced before logging
237
+ the response.
177
238
 
178
- return response_json[self.get_access_token_name()], response_json[
179
- self.get_expires_in_name()
180
- ]
239
+ Args:
240
+ response_data (Mapping[str, Any]): The response data from which to extract the access token.
241
+
242
+ Raises:
243
+ Exception: If the access token is not found in the response data.
244
+ ResponseKeysMaxRecurtionReached: If the maximum recursion depth is reached while extracting the access token.
245
+ """
246
+ try:
247
+ access_key = self._extract_access_token(response_data)
248
+ if not access_key:
249
+ raise Exception(
250
+ "Token refresh API response was missing access token {self.get_access_token_name()}"
251
+ )
252
+ # Add the access token to the list of secrets so it is replaced before logging the response
253
+ # An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen...
254
+ add_to_secrets(access_key)
255
+ except ResponseKeysMaxRecurtionReached as e:
256
+ raise e
181
257
 
182
- def _parse_token_expiration_date(self, value: Union[str, int]) -> pendulum.DateTime:
258
+ def _parse_token_expiration_date(self, value: Union[str, int]) -> AirbyteDateTime:
183
259
  """
184
260
  Return the expiration datetime of the refresh token
185
261
 
@@ -191,26 +267,139 @@ class AbstractOauth2Authenticator(AuthBase):
191
267
  raise ValueError(
192
268
  f"Invalid token expiry date format {self.token_expiry_date_format}; a string representing the format is required."
193
269
  )
194
- return pendulum.from_format(str(value), self.token_expiry_date_format)
270
+ try:
271
+ return ab_datetime_parse(str(value))
272
+ except ValueError as e:
273
+ raise ValueError(f"Invalid token expiry date format: {e}")
195
274
  else:
196
- return pendulum.now().add(seconds=int(float(value)))
275
+ try:
276
+ # Only accept numeric values (as int/float/string) when no format specified
277
+ seconds = int(float(str(value)))
278
+ return ab_datetime_now() + timedelta(seconds=seconds)
279
+ except (ValueError, TypeError):
280
+ raise ValueError(
281
+ f"Invalid expires_in value: {value}. Expected number of seconds when no format specified."
282
+ )
197
283
 
198
- @property
199
- def token_expiry_is_time_of_expiration(self) -> bool:
284
+ def _extract_access_token(self, response_data: Mapping[str, Any]) -> Any:
200
285
  """
201
- Indicates that the Token Expiry returns the date until which the token will be valid, not the amount of time it will be valid.
286
+ Extracts the access token from the given response data.
287
+
288
+ Args:
289
+ response_data (Mapping[str, Any]): The response data from which to extract the access token.
290
+
291
+ Returns:
292
+ str: The extracted access token.
202
293
  """
294
+ return self._find_and_get_value_from_response(response_data, self.get_access_token_name())
203
295
 
204
- return False
296
+ def _extract_refresh_token(self, response_data: Mapping[str, Any]) -> Any:
297
+ """
298
+ Extracts the refresh token from the given response data.
205
299
 
206
- @property
207
- def token_expiry_date_format(self) -> Optional[str]:
300
+ Args:
301
+ response_data (Mapping[str, Any]): The response data from which to extract the refresh token.
302
+
303
+ Returns:
304
+ str: The extracted refresh token.
208
305
  """
209
- Format of the datetime; exists it if expires_in is returned as the expiration datetime instead of seconds until it expires
306
+ return self._find_and_get_value_from_response(response_data, self.get_refresh_token_name())
307
+
308
+ def _extract_token_expiry_date(self, response_data: Mapping[str, Any]) -> Any:
309
+ """
310
+ Extracts the token_expiry_date, like `expires_in` or `expires_at`, etc from the given response data.
311
+
312
+ Args:
313
+ response_data (Mapping[str, Any]): The response data from which to extract the token_expiry_date.
314
+
315
+ Returns:
316
+ str: The extracted token_expiry_date.
210
317
  """
318
+ return self._find_and_get_value_from_response(response_data, self.get_expires_in_name())
319
+
320
+ def _find_and_get_value_from_response(
321
+ self,
322
+ response_data: Mapping[str, Any],
323
+ key_name: str,
324
+ max_depth: int = 5,
325
+ current_depth: int = 0,
326
+ ) -> Any:
327
+ """
328
+ Recursively searches for a specified key in a nested dictionary or list and returns its value if found.
329
+
330
+ Args:
331
+ response_data (Mapping[str, Any]): The response data to search through, which can be a dictionary or a list.
332
+ key_name (str): The key to search for in the response data.
333
+ max_depth (int, optional): The maximum depth to search for the key to avoid infinite recursion. Defaults to 5.
334
+ current_depth (int, optional): The current depth of the recursion. Defaults to 0.
335
+
336
+ Returns:
337
+ Any: The value associated with the specified key if found, otherwise None.
338
+
339
+ Raises:
340
+ AirbyteTracedException: If the maximum recursion depth is reached without finding the key.
341
+ """
342
+ if current_depth > max_depth:
343
+ # this is needed to avoid an inf loop, possible with a very deep nesting observed.
344
+ message = f"The maximum level of recursion is reached. Couldn't find the speficied `{key_name}` in the response."
345
+ raise ResponseKeysMaxRecurtionReached(
346
+ internal_message=message, message=message, failure_type=FailureType.config_error
347
+ )
348
+
349
+ if isinstance(response_data, dict):
350
+ # get from the root level
351
+ if key_name in response_data:
352
+ return response_data[key_name]
353
+
354
+ # get from the nested object
355
+ for _, value in response_data.items():
356
+ result = self._find_and_get_value_from_response(
357
+ value, key_name, max_depth, current_depth + 1
358
+ )
359
+ if result is not None:
360
+ return result
361
+
362
+ # get from the nested array object
363
+ elif isinstance(response_data, list):
364
+ for item in response_data:
365
+ result = self._find_and_get_value_from_response(
366
+ item, key_name, max_depth, current_depth + 1
367
+ )
368
+ if result is not None:
369
+ return result
211
370
 
212
371
  return None
213
372
 
373
+ @property
374
+ def _message_repository(self) -> Optional[MessageRepository]:
375
+ """
376
+ The implementation can define a message_repository if it wants debugging logs for HTTP requests
377
+ """
378
+ return _NOOP_MESSAGE_REPOSITORY
379
+
380
+ def _log_response(self, response: requests.Response) -> None:
381
+ """
382
+ Logs the HTTP response using the message repository if it is available.
383
+
384
+ Args:
385
+ response (requests.Response): The HTTP response to log.
386
+ """
387
+ if self._message_repository:
388
+ self._message_repository.log_message(
389
+ Level.DEBUG,
390
+ lambda: format_http_message(
391
+ response,
392
+ "Refresh token",
393
+ "Obtains access token",
394
+ self._NO_STREAM_NAME,
395
+ is_auxiliary=True,
396
+ ),
397
+ )
398
+
399
+ # ----------------
400
+ # ABSTR METHODS
401
+ # ----------------
402
+
214
403
  @abstractmethod
215
404
  def get_token_refresh_endpoint(self) -> Optional[str]:
216
405
  """Returns the endpoint to refresh the access token"""
@@ -244,7 +433,7 @@ class AbstractOauth2Authenticator(AuthBase):
244
433
  """List of requested scopes"""
245
434
 
246
435
  @abstractmethod
247
- def get_token_expiry_date(self) -> pendulum.DateTime:
436
+ def get_token_expiry_date(self) -> AirbyteDateTime:
248
437
  """Expiration date of the access token"""
249
438
 
250
439
  @abstractmethod
@@ -284,23 +473,3 @@ class AbstractOauth2Authenticator(AuthBase):
284
473
  @abstractmethod
285
474
  def access_token(self, value: str) -> str:
286
475
  """Setter for the access token"""
287
-
288
- @property
289
- def _message_repository(self) -> Optional[MessageRepository]:
290
- """
291
- The implementation can define a message_repository if it wants debugging logs for HTTP requests
292
- """
293
- return _NOOP_MESSAGE_REPOSITORY
294
-
295
- def _log_response(self, response: requests.Response) -> None:
296
- if self._message_repository:
297
- self._message_repository.log_message(
298
- Level.DEBUG,
299
- lambda: format_http_message(
300
- response,
301
- "Refresh token",
302
- "Obtains access token",
303
- self._NO_STREAM_NAME,
304
- is_auxiliary=True,
305
- ),
306
- )