airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
  11. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  14. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  15. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  16. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
  18. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
  20. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  21. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  22. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
  23. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
  24. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  25. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  26. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  27. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  28. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  29. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  30. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
  31. airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
  32. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
  33. airbyte_cdk/sources/http_logger.py +1 -1
  34. airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
  35. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
  36. airbyte_cdk/sources/streams/core.py +6 -6
  37. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
  38. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
  39. airbyte_cdk/sources/types.py +2 -4
  40. airbyte_cdk/sources/utils/transform.py +2 -23
  41. airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
  42. airbyte_cdk/utils/mapping_helpers.py +86 -27
  43. airbyte_cdk/utils/slice_hasher.py +1 -8
  44. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
  45. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
  46. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
  47. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
  48. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
  49. airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
  50. airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
  51. airbyte_cdk/utils/datetime_helpers.py +0 -499
  52. airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
  53. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
  54. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -4,11 +4,11 @@
4
4
 
5
5
  import logging
6
6
  from abc import abstractmethod
7
- from datetime import timedelta
8
7
  from json import JSONDecodeError
9
8
  from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
10
9
 
11
10
  import backoff
11
+ import pendulum
12
12
  import requests
13
13
  from requests.auth import AuthBase
14
14
 
@@ -17,7 +17,6 @@ from airbyte_cdk.sources.http_logger import format_http_message
17
17
  from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
18
18
  from airbyte_cdk.utils import AirbyteTracedException
19
19
  from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets
20
- from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
21
20
 
22
21
  from ..exceptions import DefaultBackoffException
23
22
 
@@ -25,13 +24,6 @@ logger = logging.getLogger("airbyte")
25
24
  _NOOP_MESSAGE_REPOSITORY = NoopMessageRepository()
26
25
 
27
26
 
28
- class ResponseKeysMaxRecurtionReached(AirbyteTracedException):
29
- """
30
- Raised when the max level of recursion is reached, when trying to
31
- find-and-get the target key, during the `_make_handled_request`
32
- """
33
-
34
-
35
27
  class AbstractOauth2Authenticator(AuthBase):
36
28
  """
37
29
  Abstract class for an OAuth authenticators that implements the OAuth token refresh flow. The authenticator
@@ -60,31 +52,15 @@ class AbstractOauth2Authenticator(AuthBase):
60
52
  request.headers.update(self.get_auth_header())
61
53
  return request
62
54
 
63
- @property
64
- def _is_access_token_flow(self) -> bool:
65
- return self.get_token_refresh_endpoint() is None and self.access_token is not None
66
-
67
- @property
68
- def token_expiry_is_time_of_expiration(self) -> bool:
69
- """
70
- Indicates that the Token Expiry returns the date until which the token will be valid, not the amount of time it will be valid.
71
- """
72
-
73
- return False
74
-
75
- @property
76
- def token_expiry_date_format(self) -> Optional[str]:
77
- """
78
- Format of the datetime; exists it if expires_in is returned as the expiration datetime instead of seconds until it expires
79
- """
80
-
81
- return None
82
-
83
55
  def get_auth_header(self) -> Mapping[str, Any]:
84
56
  """HTTP header to set on the requests"""
85
57
  token = self.access_token if self._is_access_token_flow else self.get_access_token()
86
58
  return {"Authorization": f"Bearer {token}"}
87
59
 
60
+ @property
61
+ def _is_access_token_flow(self) -> bool:
62
+ return self.get_token_refresh_endpoint() is None and self.access_token is not None
63
+
88
64
  def get_access_token(self) -> str:
89
65
  """Returns the access token"""
90
66
  if self.token_has_expired():
@@ -96,7 +72,7 @@ class AbstractOauth2Authenticator(AuthBase):
96
72
 
97
73
  def token_has_expired(self) -> bool:
98
74
  """Returns True if the token is expired"""
99
- return ab_datetime_now() > self.get_token_expiry_date()
75
+ return pendulum.now() > self.get_token_expiry_date() # type: ignore # this is always a bool despite what mypy thinks
100
76
 
101
77
  def build_refresh_request_body(self) -> Mapping[str, Any]:
102
78
  """
@@ -130,39 +106,9 @@ class AbstractOauth2Authenticator(AuthBase):
130
106
  headers = self.get_refresh_request_headers()
131
107
  return headers if headers else None
132
108
 
133
- def refresh_access_token(self) -> Tuple[str, Union[str, int]]:
134
- """
135
- Returns the refresh token and its expiration datetime
136
-
137
- :return: a tuple of (access_token, token_lifespan)
138
- """
139
- response_json = self._make_handled_request()
140
- self._ensure_access_token_in_response(response_json)
141
-
142
- return (
143
- self._extract_access_token(response_json),
144
- self._extract_token_expiry_date(response_json),
145
- )
146
-
147
- # ----------------
148
- # PRIVATE METHODS
149
- # ----------------
150
-
151
109
  def _wrap_refresh_token_exception(
152
110
  self, exception: requests.exceptions.RequestException
153
111
  ) -> bool:
154
- """
155
- Wraps and handles exceptions that occur during the refresh token process.
156
-
157
- This method checks if the provided exception is related to a refresh token error
158
- by examining the response status code and specific error content.
159
-
160
- Args:
161
- exception (requests.exceptions.RequestException): The exception raised during the request.
162
-
163
- Returns:
164
- bool: True if the exception is related to a refresh token error, False otherwise.
165
- """
166
112
  try:
167
113
  if exception.response is not None:
168
114
  exception_content = exception.response.json()
@@ -184,24 +130,7 @@ class AbstractOauth2Authenticator(AuthBase):
184
130
  ),
185
131
  max_time=300,
186
132
  )
187
- def _make_handled_request(self) -> Any:
188
- """
189
- Makes a handled HTTP request to refresh an OAuth token.
190
-
191
- This method sends a POST request to the token refresh endpoint with the necessary
192
- headers and body to obtain a new access token. It handles various exceptions that
193
- may occur during the request and logs the response for troubleshooting purposes.
194
-
195
- Returns:
196
- Mapping[str, Any]: The JSON response from the token refresh endpoint.
197
-
198
- Raises:
199
- DefaultBackoffException: If the response status code is 429 (Too Many Requests)
200
- or any 5xx server error.
201
- AirbyteTracedException: If the refresh token is invalid or expired, prompting
202
- re-authentication.
203
- Exception: For any other exceptions that occur during the request.
204
- """
133
+ def _get_refresh_access_token_response(self) -> Any:
205
134
  try:
206
135
  response = requests.request(
207
136
  method="POST",
@@ -209,10 +138,22 @@ class AbstractOauth2Authenticator(AuthBase):
209
138
  data=self.build_refresh_request_body(),
210
139
  headers=self.build_refresh_request_headers(),
211
140
  )
212
- # log the response even if the request failed for troubleshooting purposes
213
- self._log_response(response)
214
- response.raise_for_status()
215
- return response.json()
141
+ if response.ok:
142
+ response_json = response.json()
143
+ # Add the access token to the list of secrets so it is replaced before logging the response
144
+ # An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen...
145
+ access_key = response_json.get(self.get_access_token_name())
146
+ if not access_key:
147
+ raise Exception(
148
+ "Token refresh API response was missing access token {self.get_access_token_name()}"
149
+ )
150
+ add_to_secrets(access_key)
151
+ self._log_response(response)
152
+ return response_json
153
+ else:
154
+ # log the response even if the request failed for troubleshooting purposes
155
+ self._log_response(response)
156
+ response.raise_for_status()
216
157
  except requests.exceptions.RequestException as e:
217
158
  if e.response is not None:
218
159
  if e.response.status_code == 429 or e.response.status_code >= 500:
@@ -226,36 +167,19 @@ class AbstractOauth2Authenticator(AuthBase):
226
167
  except Exception as e:
227
168
  raise Exception(f"Error while refreshing access token: {e}") from e
228
169
 
229
- def _ensure_access_token_in_response(self, response_data: Mapping[str, Any]) -> None:
170
+ def refresh_access_token(self) -> Tuple[str, Union[str, int]]:
230
171
  """
231
- Ensures that the access token is present in the response data.
232
-
233
- This method attempts to extract the access token from the provided response data.
234
- If the access token is not found, it raises an exception indicating that the token
235
- refresh API response was missing the access token. If the access token is found,
236
- it adds the token to the list of secrets to ensure it is replaced before logging
237
- the response.
238
-
239
- Args:
240
- response_data (Mapping[str, Any]): The response data from which to extract the access token.
172
+ Returns the refresh token and its expiration datetime
241
173
 
242
- Raises:
243
- Exception: If the access token is not found in the response data.
244
- ResponseKeysMaxRecurtionReached: If the maximum recursion depth is reached while extracting the access token.
174
+ :return: a tuple of (access_token, token_lifespan)
245
175
  """
246
- try:
247
- access_key = self._extract_access_token(response_data)
248
- if not access_key:
249
- raise Exception(
250
- "Token refresh API response was missing access token {self.get_access_token_name()}"
251
- )
252
- # Add the access token to the list of secrets so it is replaced before logging the response
253
- # An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen...
254
- add_to_secrets(access_key)
255
- except ResponseKeysMaxRecurtionReached as e:
256
- raise e
176
+ response_json = self._get_refresh_access_token_response()
177
+
178
+ return response_json[self.get_access_token_name()], response_json[
179
+ self.get_expires_in_name()
180
+ ]
257
181
 
258
- def _parse_token_expiration_date(self, value: Union[str, int]) -> AirbyteDateTime:
182
+ def _parse_token_expiration_date(self, value: Union[str, int]) -> pendulum.DateTime:
259
183
  """
260
184
  Return the expiration datetime of the refresh token
261
185
 
@@ -267,138 +191,25 @@ class AbstractOauth2Authenticator(AuthBase):
267
191
  raise ValueError(
268
192
  f"Invalid token expiry date format {self.token_expiry_date_format}; a string representing the format is required."
269
193
  )
270
- try:
271
- return ab_datetime_parse(str(value))
272
- except ValueError as e:
273
- raise ValueError(f"Invalid token expiry date format: {e}")
194
+ return pendulum.from_format(str(value), self.token_expiry_date_format)
274
195
  else:
275
- try:
276
- # Only accept numeric values (as int/float/string) when no format specified
277
- seconds = int(float(str(value)))
278
- return ab_datetime_now() + timedelta(seconds=seconds)
279
- except (ValueError, TypeError):
280
- raise ValueError(
281
- f"Invalid expires_in value: {value}. Expected number of seconds when no format specified."
282
- )
283
-
284
- def _extract_access_token(self, response_data: Mapping[str, Any]) -> Any:
285
- """
286
- Extracts the access token from the given response data.
196
+ return pendulum.now().add(seconds=int(float(value)))
287
197
 
288
- Args:
289
- response_data (Mapping[str, Any]): The response data from which to extract the access token.
290
-
291
- Returns:
292
- str: The extracted access token.
293
- """
294
- return self._find_and_get_value_from_response(response_data, self.get_access_token_name())
295
-
296
- def _extract_refresh_token(self, response_data: Mapping[str, Any]) -> Any:
297
- """
298
- Extracts the refresh token from the given response data.
299
-
300
- Args:
301
- response_data (Mapping[str, Any]): The response data from which to extract the refresh token.
302
-
303
- Returns:
304
- str: The extracted refresh token.
305
- """
306
- return self._find_and_get_value_from_response(response_data, self.get_refresh_token_name())
307
-
308
- def _extract_token_expiry_date(self, response_data: Mapping[str, Any]) -> Any:
309
- """
310
- Extracts the token_expiry_date, like `expires_in` or `expires_at`, etc from the given response data.
311
-
312
- Args:
313
- response_data (Mapping[str, Any]): The response data from which to extract the token_expiry_date.
314
-
315
- Returns:
316
- str: The extracted token_expiry_date.
317
- """
318
- return self._find_and_get_value_from_response(response_data, self.get_expires_in_name())
319
-
320
- def _find_and_get_value_from_response(
321
- self,
322
- response_data: Mapping[str, Any],
323
- key_name: str,
324
- max_depth: int = 5,
325
- current_depth: int = 0,
326
- ) -> Any:
198
+ @property
199
+ def token_expiry_is_time_of_expiration(self) -> bool:
327
200
  """
328
- Recursively searches for a specified key in a nested dictionary or list and returns its value if found.
329
-
330
- Args:
331
- response_data (Mapping[str, Any]): The response data to search through, which can be a dictionary or a list.
332
- key_name (str): The key to search for in the response data.
333
- max_depth (int, optional): The maximum depth to search for the key to avoid infinite recursion. Defaults to 5.
334
- current_depth (int, optional): The current depth of the recursion. Defaults to 0.
335
-
336
- Returns:
337
- Any: The value associated with the specified key if found, otherwise None.
338
-
339
- Raises:
340
- AirbyteTracedException: If the maximum recursion depth is reached without finding the key.
201
+ Indicates that the Token Expiry returns the date until which the token will be valid, not the amount of time it will be valid.
341
202
  """
342
- if current_depth > max_depth:
343
- # this is needed to avoid an inf loop, possible with a very deep nesting observed.
344
- message = f"The maximum level of recursion is reached. Couldn't find the speficied `{key_name}` in the response."
345
- raise ResponseKeysMaxRecurtionReached(
346
- internal_message=message, message=message, failure_type=FailureType.config_error
347
- )
348
203
 
349
- if isinstance(response_data, dict):
350
- # get from the root level
351
- if key_name in response_data:
352
- return response_data[key_name]
353
-
354
- # get from the nested object
355
- for _, value in response_data.items():
356
- result = self._find_and_get_value_from_response(
357
- value, key_name, max_depth, current_depth + 1
358
- )
359
- if result is not None:
360
- return result
361
-
362
- # get from the nested array object
363
- elif isinstance(response_data, list):
364
- for item in response_data:
365
- result = self._find_and_get_value_from_response(
366
- item, key_name, max_depth, current_depth + 1
367
- )
368
- if result is not None:
369
- return result
370
-
371
- return None
204
+ return False
372
205
 
373
206
  @property
374
- def _message_repository(self) -> Optional[MessageRepository]:
375
- """
376
- The implementation can define a message_repository if it wants debugging logs for HTTP requests
207
+ def token_expiry_date_format(self) -> Optional[str]:
377
208
  """
378
- return _NOOP_MESSAGE_REPOSITORY
379
-
380
- def _log_response(self, response: requests.Response) -> None:
209
+ Format of the datetime; exists it if expires_in is returned as the expiration datetime instead of seconds until it expires
381
210
  """
382
- Logs the HTTP response using the message repository if it is available.
383
211
 
384
- Args:
385
- response (requests.Response): The HTTP response to log.
386
- """
387
- if self._message_repository:
388
- self._message_repository.log_message(
389
- Level.DEBUG,
390
- lambda: format_http_message(
391
- response,
392
- "Refresh token",
393
- "Obtains access token",
394
- self._NO_STREAM_NAME,
395
- is_auxiliary=True,
396
- ),
397
- )
398
-
399
- # ----------------
400
- # ABSTR METHODS
401
- # ----------------
212
+ return None
402
213
 
403
214
  @abstractmethod
404
215
  def get_token_refresh_endpoint(self) -> Optional[str]:
@@ -433,7 +244,7 @@ class AbstractOauth2Authenticator(AuthBase):
433
244
  """List of requested scopes"""
434
245
 
435
246
  @abstractmethod
436
- def get_token_expiry_date(self) -> AirbyteDateTime:
247
+ def get_token_expiry_date(self) -> pendulum.DateTime:
437
248
  """Expiration date of the access token"""
438
249
 
439
250
  @abstractmethod
@@ -473,3 +284,23 @@ class AbstractOauth2Authenticator(AuthBase):
473
284
  @abstractmethod
474
285
  def access_token(self, value: str) -> str:
475
286
  """Setter for the access token"""
287
+
288
+ @property
289
+ def _message_repository(self) -> Optional[MessageRepository]:
290
+ """
291
+ The implementation can define a message_repository if it wants debugging logs for HTTP requests
292
+ """
293
+ return _NOOP_MESSAGE_REPOSITORY
294
+
295
+ def _log_response(self, response: requests.Response) -> None:
296
+ if self._message_repository:
297
+ self._message_repository.log_message(
298
+ Level.DEBUG,
299
+ lambda: format_http_message(
300
+ response,
301
+ "Refresh token",
302
+ "Obtains access token",
303
+ self._NO_STREAM_NAME,
304
+ is_auxiliary=True,
305
+ ),
306
+ )