airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
  11. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  14. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  15. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  16. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
  18. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
  20. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  21. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  22. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
  23. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
  24. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  25. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  26. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  27. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  28. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  29. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  30. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
  31. airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
  32. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
  33. airbyte_cdk/sources/http_logger.py +1 -1
  34. airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
  35. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
  36. airbyte_cdk/sources/streams/core.py +6 -6
  37. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
  38. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
  39. airbyte_cdk/sources/types.py +2 -4
  40. airbyte_cdk/sources/utils/transform.py +2 -23
  41. airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
  42. airbyte_cdk/utils/mapping_helpers.py +86 -27
  43. airbyte_cdk/utils/slice_hasher.py +1 -8
  44. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
  45. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
  46. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
  47. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
  48. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
  49. airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
  50. airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
  51. airbyte_cdk/utils/datetime_helpers.py +0 -499
  52. airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
  53. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
  54. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -2,10 +2,10 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from datetime import timedelta
6
5
  from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union
7
6
 
8
7
  import dpath
8
+ import pendulum
9
9
 
10
10
  from airbyte_cdk.config_observation import (
11
11
  create_connector_config_control_message,
@@ -15,11 +15,6 @@ from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
15
15
  from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import (
16
16
  AbstractOauth2Authenticator,
17
17
  )
18
- from airbyte_cdk.utils.datetime_helpers import (
19
- AirbyteDateTime,
20
- ab_datetime_now,
21
- ab_datetime_parse,
22
- )
23
18
 
24
19
 
25
20
  class Oauth2Authenticator(AbstractOauth2Authenticator):
@@ -39,7 +34,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
39
34
  client_secret_name: str = "client_secret",
40
35
  refresh_token_name: str = "refresh_token",
41
36
  scopes: List[str] | None = None,
42
- token_expiry_date: AirbyteDateTime | None = None,
37
+ token_expiry_date: pendulum.DateTime | None = None,
43
38
  token_expiry_date_format: str | None = None,
44
39
  access_token_name: str = "access_token",
45
40
  expires_in_name: str = "expires_in",
@@ -51,7 +46,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
51
46
  refresh_token_error_status_codes: Tuple[int, ...] = (),
52
47
  refresh_token_error_key: str = "",
53
48
  refresh_token_error_values: Tuple[str, ...] = (),
54
- ) -> None:
49
+ ):
55
50
  self._token_refresh_endpoint = token_refresh_endpoint
56
51
  self._client_secret_name = client_secret_name
57
52
  self._client_secret = client_secret
@@ -67,7 +62,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
67
62
  self._grant_type_name = grant_type_name
68
63
  self._grant_type = grant_type
69
64
 
70
- self._token_expiry_date = token_expiry_date or (ab_datetime_now() - timedelta(days=1))
65
+ self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) # type: ignore [no-untyped-call]
71
66
  self._token_expiry_date_format = token_expiry_date_format
72
67
  self._token_expiry_is_time_of_expiration = token_expiry_is_time_of_expiration
73
68
  self._access_token = None
@@ -100,16 +95,16 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
100
95
  return self._access_token_name
101
96
 
102
97
  def get_scopes(self) -> list[str]:
103
- return self._scopes # type: ignore[return-value]
98
+ return self._scopes # type: ignore [return-value]
104
99
 
105
100
  def get_expires_in_name(self) -> str:
106
101
  return self._expires_in_name
107
102
 
108
103
  def get_refresh_request_body(self) -> Mapping[str, Any]:
109
- return self._refresh_request_body # type: ignore[return-value]
104
+ return self._refresh_request_body # type: ignore [return-value]
110
105
 
111
106
  def get_refresh_request_headers(self) -> Mapping[str, Any]:
112
- return self._refresh_request_headers # type: ignore[return-value]
107
+ return self._refresh_request_headers # type: ignore [return-value]
113
108
 
114
109
  def get_grant_type_name(self) -> str:
115
110
  return self._grant_type_name
@@ -117,7 +112,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
117
112
  def get_grant_type(self) -> str:
118
113
  return self._grant_type
119
114
 
120
- def get_token_expiry_date(self) -> AirbyteDateTime:
115
+ def get_token_expiry_date(self) -> pendulum.DateTime:
121
116
  return self._token_expiry_date
122
117
 
123
118
  def set_token_expiry_date(self, value: Union[str, int]) -> None:
@@ -133,11 +128,11 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
133
128
 
134
129
  @property
135
130
  def access_token(self) -> str:
136
- return self._access_token # type: ignore[return-value]
131
+ return self._access_token # type: ignore [return-value]
137
132
 
138
133
  @access_token.setter
139
134
  def access_token(self, value: str) -> None:
140
- self._access_token = value # type: ignore[assignment] # Incorrect type for assignment
135
+ self._access_token = value # type: ignore [assignment] # Incorrect type for assignment
141
136
 
142
137
 
143
138
  class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
@@ -175,7 +170,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
175
170
  refresh_token_error_status_codes: Tuple[int, ...] = (),
176
171
  refresh_token_error_key: str = "",
177
172
  refresh_token_error_values: Tuple[str, ...] = (),
178
- ) -> None:
173
+ ):
179
174
  """
180
175
  Args:
181
176
  connector_config (Mapping[str, Any]): The full connector configuration
@@ -196,12 +191,18 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
196
191
  token_expiry_is_time_of_expiration bool: set True it if expires_in is returned as time of expiration instead of the number seconds until expiration
197
192
  message_repository (MessageRepository): the message repository used to emit logs on HTTP requests and control message on config update
198
193
  """
199
- self._connector_config = connector_config
200
- self._client_id: str = self._get_config_value_by_path(
201
- ("credentials", "client_id"), client_id
194
+ self._client_id = (
195
+ client_id # type: ignore [assignment] # Incorrect type for assignment
196
+ if client_id is not None
197
+ else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore [arg-type]
202
198
  )
203
- self._client_secret: str = self._get_config_value_by_path(
204
- ("credentials", "client_secret"), client_secret
199
+ self._client_secret = (
200
+ client_secret # type: ignore [assignment] # Incorrect type for assignment
201
+ if client_secret is not None
202
+ else dpath.get(
203
+ connector_config, # type: ignore [arg-type]
204
+ ("credentials", "client_secret"),
205
+ )
205
206
  )
206
207
  self._client_id_name = client_id_name
207
208
  self._client_secret_name = client_secret_name
@@ -216,9 +217,9 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
216
217
  super().__init__(
217
218
  token_refresh_endpoint=token_refresh_endpoint,
218
219
  client_id_name=self._client_id_name,
219
- client_id=self._client_id,
220
+ client_id=self.get_client_id(),
220
221
  client_secret_name=self._client_secret_name,
221
- client_secret=self._client_secret,
222
+ client_secret=self.get_client_secret(),
222
223
  refresh_token=self.get_refresh_token(),
223
224
  refresh_token_name=self._refresh_token_name,
224
225
  scopes=scopes,
@@ -236,105 +237,76 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
236
237
  refresh_token_error_values=refresh_token_error_values,
237
238
  )
238
239
 
240
+ def get_refresh_token_name(self) -> str:
241
+ return self._refresh_token_name
242
+
243
+ def get_client_id(self) -> str:
244
+ return self._client_id
245
+
246
+ def get_client_secret(self) -> str:
247
+ return self._client_secret
248
+
239
249
  @property
240
250
  def access_token(self) -> str:
241
- """
242
- Retrieve the access token from the configuration.
243
-
244
- Returns:
245
- str: The access token.
246
- """
247
- return self._get_config_value_by_path(self._access_token_config_path) # type: ignore[return-value]
251
+ return dpath.get( # type: ignore [return-value]
252
+ self._connector_config, # type: ignore [arg-type]
253
+ self._access_token_config_path,
254
+ default="",
255
+ )
248
256
 
249
257
  @access_token.setter
250
258
  def access_token(self, new_access_token: str) -> None:
251
- """
252
- Sets a new access token.
253
-
254
- Args:
255
- new_access_token (str): The new access token to be set.
256
- """
257
- self._set_config_value_by_path(self._access_token_config_path, new_access_token)
259
+ dpath.new(
260
+ self._connector_config, # type: ignore [arg-type]
261
+ self._access_token_config_path,
262
+ new_access_token,
263
+ )
258
264
 
259
265
  def get_refresh_token(self) -> str:
260
- """
261
- Retrieve the refresh token from the configuration.
262
-
263
- This method fetches the refresh token using the configuration path specified
264
- by `_refresh_token_config_path`.
265
-
266
- Returns:
267
- str: The refresh token as a string.
268
- """
269
- return self._get_config_value_by_path(self._refresh_token_config_path) # type: ignore[return-value]
266
+ return dpath.get( # type: ignore [return-value]
267
+ self._connector_config, # type: ignore [arg-type]
268
+ self._refresh_token_config_path,
269
+ default="",
270
+ )
270
271
 
271
272
  def set_refresh_token(self, new_refresh_token: str) -> None:
272
- """
273
- Updates the refresh token in the configuration.
274
-
275
- Args:
276
- new_refresh_token (str): The new refresh token to be set.
277
- """
278
- self._set_config_value_by_path(self._refresh_token_config_path, new_refresh_token)
279
-
280
- def get_token_expiry_date(self) -> AirbyteDateTime:
281
- """
282
- Retrieves the token expiry date from the configuration.
283
-
284
- This method fetches the token expiry date from the configuration using the specified path.
285
- If the expiry date is an empty string, it returns the current date and time minus one day.
286
- Otherwise, it parses the expiry date string into an AirbyteDateTime object.
287
-
288
- Returns:
289
- AirbyteDateTime: The parsed or calculated token expiry date.
290
-
291
- Raises:
292
- TypeError: If the result is not an instance of AirbyteDateTime.
293
- """
294
- expiry_date = self._get_config_value_by_path(self._token_expiry_date_config_path)
295
- result = (
296
- ab_datetime_now() - timedelta(days=1)
297
- if expiry_date == ""
298
- else ab_datetime_parse(str(expiry_date))
273
+ dpath.new(
274
+ self._connector_config, # type: ignore [arg-type]
275
+ self._refresh_token_config_path,
276
+ new_refresh_token,
299
277
  )
300
- if isinstance(result, AirbyteDateTime):
301
- return result
302
- raise TypeError("Invalid datetime conversion")
303
278
 
304
- def set_token_expiry_date(self, new_token_expiry_date: AirbyteDateTime) -> None: # type: ignore[override]
305
- """
306
- Sets the token expiry date in the configuration.
279
+ def get_token_expiry_date(self) -> pendulum.DateTime:
280
+ expiry_date = dpath.get(
281
+ self._connector_config, # type: ignore [arg-type]
282
+ self._token_expiry_date_config_path,
283
+ default="",
284
+ )
285
+ return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) # type: ignore [arg-type, return-value, no-untyped-call]
307
286
 
308
- Args:
309
- new_token_expiry_date (AirbyteDateTime): The new expiry date for the token.
310
- """
311
- self._set_config_value_by_path(
312
- self._token_expiry_date_config_path, str(new_token_expiry_date)
287
+ def set_token_expiry_date( # type: ignore[override]
288
+ self,
289
+ new_token_expiry_date: pendulum.DateTime,
290
+ ) -> None:
291
+ dpath.new(
292
+ self._connector_config, # type: ignore [arg-type]
293
+ self._token_expiry_date_config_path,
294
+ str(new_token_expiry_date),
313
295
  )
314
296
 
315
297
  def token_has_expired(self) -> bool:
316
298
  """Returns True if the token is expired"""
317
- return ab_datetime_now() > self.get_token_expiry_date()
299
+ return pendulum.now("UTC") > self.get_token_expiry_date()
318
300
 
319
301
  @staticmethod
320
302
  def get_new_token_expiry_date(
321
303
  access_token_expires_in: str,
322
304
  token_expiry_date_format: str | None = None,
323
- ) -> AirbyteDateTime:
324
- """
325
- Calculate the new token expiry date based on the provided expiration duration or format.
326
-
327
- Args:
328
- access_token_expires_in (str): The duration (in seconds) until the access token expires, or the expiry date in a specific format.
329
- token_expiry_date_format (str | None, optional): The format of the expiry date if provided. Defaults to None.
330
-
331
- Returns:
332
- AirbyteDateTime: The calculated expiry date of the access token.
333
- """
305
+ ) -> pendulum.DateTime:
334
306
  if token_expiry_date_format:
335
- return ab_datetime_parse(access_token_expires_in)
307
+ return pendulum.from_format(access_token_expires_in, token_expiry_date_format)
336
308
  else:
337
- return ab_datetime_now() + timedelta(seconds=int(access_token_expires_in))
309
+ return pendulum.now("UTC").add(seconds=int(access_token_expires_in))
338
310
 
339
311
  def get_access_token(self) -> str:
340
312
  """Retrieve new access and refresh token if the access token has expired.
@@ -346,88 +318,33 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
346
318
  new_access_token, access_token_expires_in, new_refresh_token = (
347
319
  self.refresh_access_token()
348
320
  )
349
- new_token_expiry_date: AirbyteDateTime = self.get_new_token_expiry_date(
321
+ new_token_expiry_date: pendulum.DateTime = self.get_new_token_expiry_date(
350
322
  access_token_expires_in, self._token_expiry_date_format
351
323
  )
352
324
  self.access_token = new_access_token
353
325
  self.set_refresh_token(new_refresh_token)
354
326
  self.set_token_expiry_date(new_token_expiry_date)
355
- self._emit_control_message()
327
+ # FIXME emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message
328
+ # Usually, a class shouldn't care about the implementation details but to keep backward compatibility where we print the
329
+ # message directly in the console, this is needed
330
+ if not isinstance(self._message_repository, NoopMessageRepository):
331
+ self._message_repository.emit_message(
332
+ create_connector_config_control_message(self._connector_config) # type: ignore [arg-type]
333
+ )
334
+ else:
335
+ emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore [arg-type]
356
336
  return self.access_token
357
337
 
358
- def refresh_access_token(self) -> Tuple[str, str, str]: # type: ignore[override]
359
- """
360
- Refreshes the access token by making a handled request and extracting the necessary token information.
361
-
362
- Returns:
363
- Tuple[str, str, str]: A tuple containing the new access token, token expiry date, and refresh token.
364
- """
365
- response_json = self._make_handled_request()
338
+ def refresh_access_token( # type: ignore[override] # Signature doesn't match base class
339
+ self,
340
+ ) -> Tuple[str, str, str]:
341
+ response_json = self._get_refresh_access_token_response()
366
342
  return (
367
- self._extract_access_token(response_json),
368
- self._extract_token_expiry_date(response_json),
369
- self._extract_refresh_token(response_json),
370
- )
371
-
372
- def _set_config_value_by_path(self, config_path: Union[str, Sequence[str]], value: Any) -> None:
373
- """
374
- Set a value in the connector configuration at the specified path.
375
-
376
- Args:
377
- config_path (Union[str, Sequence[str]]): The path within the configuration where the value should be set.
378
- This can be a string representing a single key or a sequence of strings representing a nested path.
379
- value (Any): The value to set at the specified path in the configuration.
380
-
381
- Returns:
382
- None
383
- """
384
- dpath.new(self._connector_config, config_path, value) # type: ignore[arg-type]
385
-
386
- def _get_config_value_by_path(
387
- self, config_path: Union[str, Sequence[str]], default: Optional[str] = None
388
- ) -> str | Any:
389
- """
390
- Retrieve a value from the connector configuration using a specified path.
391
-
392
- Args:
393
- config_path (Union[str, Sequence[str]]): The path to the desired configuration value. This can be a string or a sequence of strings.
394
- default (Optional[str], optional): The default value to return if the specified path does not exist in the configuration. Defaults to None.
395
-
396
- Returns:
397
- Any: The value from the configuration at the specified path, or the default value if the path does not exist.
398
- """
399
- return dpath.get(
400
- self._connector_config, # type: ignore[arg-type]
401
- config_path,
402
- default=default if default is not None else "",
343
+ response_json[self.get_access_token_name()],
344
+ response_json[self.get_expires_in_name()],
345
+ response_json[self.get_refresh_token_name()],
403
346
  )
404
347
 
405
- def _emit_control_message(self) -> None:
406
- """
407
- Emits a control message based on the connector configuration.
408
-
409
- This method checks if the message repository is not a NoopMessageRepository.
410
- If it is not, it emits a message using the message repository. Otherwise,
411
- it falls back to emitting the configuration as an Airbyte control message
412
- directly to the console for backward compatibility.
413
-
414
- Note:
415
- The function `emit_configuration_as_airbyte_control_message` has been deprecated
416
- in favor of the package `airbyte_cdk.sources.message`.
417
-
418
- Raises:
419
- TypeError: If the argument types are incorrect.
420
- """
421
- # FIXME emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message
422
- # Usually, a class shouldn't care about the implementation details but to keep backward compatibility where we print the
423
- # message directly in the console, this is needed
424
- if not isinstance(self._message_repository, NoopMessageRepository):
425
- self._message_repository.emit_message(
426
- create_connector_config_control_message(self._connector_config) # type: ignore[arg-type]
427
- )
428
- else:
429
- emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore[arg-type]
430
-
431
348
  @property
432
349
  def _message_repository(self) -> MessageRepository:
433
350
  """
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
8
8
 
9
- from airbyte_cdk.utils.slice_hasher import SliceHasher
9
+ import orjson
10
10
 
11
11
  # A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
12
12
  # "hello"}] returns "hello"
@@ -151,9 +151,7 @@ class StreamSlice(Mapping[str, Any]):
151
151
  return self._stream_slice
152
152
 
153
153
  def __hash__(self) -> int:
154
- return SliceHasher.hash(
155
- stream_slice=self._stream_slice
156
- ) # no need to provide stream_name here as this is used for slicing the cursor
154
+ return hash(orjson.dumps(self._stream_slice, option=orjson.OPT_SORT_KEYS))
157
155
 
158
156
  def __bool__(self) -> bool:
159
157
  return bool(self._stream_slice) or bool(self._extra_fields)
@@ -3,6 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
+ from distutils.util import strtobool
6
7
  from enum import Flag, auto
7
8
  from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast
8
9
 
@@ -21,28 +22,6 @@ python_to_json = {v: k for k, v in json_to_python.items()}
21
22
 
22
23
  logger = logging.getLogger("airbyte")
23
24
 
24
- _TRUTHY_STRINGS = ("y", "yes", "t", "true", "on", "1")
25
- _FALSEY_STRINGS = ("n", "no", "f", "false", "off", "0")
26
-
27
-
28
- def _strtobool(value: str, /) -> int:
29
- """Mimic the behavior of distutils.util.strtobool.
30
-
31
- From: https://docs.python.org/2/distutils/apiref.html#distutils.util.strtobool
32
-
33
- > Convert a string representation of truth to true (1) or false (0).
34
- > True values are y, yes, t, true, on and 1; false values are n, no, f, false, off and 0. Raises
35
- > `ValueError` if val is anything else.
36
- """
37
- normalized_str = value.lower().strip()
38
- if normalized_str in _TRUTHY_STRINGS:
39
- return 1
40
-
41
- if normalized_str in _FALSEY_STRINGS:
42
- return 0
43
-
44
- raise ValueError(f"Invalid boolean value: {normalized_str}")
45
-
46
25
 
47
26
  class TransformConfig(Flag):
48
27
  """
@@ -150,7 +129,7 @@ class TypeTransformer:
150
129
  return int(original_item)
151
130
  elif target_type == "boolean":
152
131
  if isinstance(original_item, str):
153
- return _strtobool(original_item) == 1
132
+ return strtobool(original_item) == 1
154
133
  return bool(original_item)
155
134
  elif target_type == "array":
156
135
  item_types = set(subschema.get("items", {}).get("type", set()))
@@ -4,6 +4,7 @@
4
4
  import importlib.util
5
5
  from pathlib import Path
6
6
  from types import ModuleType
7
+ from typing import Optional
7
8
 
8
9
  import pytest
9
10
 
@@ -29,7 +30,7 @@ def connector_dir(request: pytest.FixtureRequest) -> Path:
29
30
 
30
31
 
31
32
  @pytest.fixture(scope="session")
32
- def components_module(connector_dir: Path) -> ModuleType | None:
33
+ def components_module(connector_dir: Path) -> Optional[ModuleType]:
33
34
  """Load and return the components module from the connector directory.
34
35
 
35
36
  This assumes the components module is located at <connector_dir>/components.py.
@@ -3,43 +3,102 @@
3
3
  #
4
4
 
5
5
 
6
- from typing import Any, List, Mapping, Optional, Set, Union
6
+ import copy
7
+ from typing import Any, Dict, List, Mapping, Optional, Union
8
+
9
+
10
+ def _merge_mappings(
11
+ target: Dict[str, Any],
12
+ source: Mapping[str, Any],
13
+ path: Optional[List[str]] = None,
14
+ allow_same_value_merge: bool = False,
15
+ ) -> None:
16
+ """
17
+ Recursively merge two dictionaries, raising an error if there are any conflicts.
18
+ For body_json requests (allow_same_value_merge=True), a conflict occurs only when the same path has different values.
19
+ For other request types (allow_same_value_merge=False), any duplicate key is a conflict, regardless of value.
20
+
21
+ Args:
22
+ target: The dictionary to merge into
23
+ source: The dictionary to merge from
24
+ path: The current path in the nested structure (for error messages)
25
+ allow_same_value_merge: Whether to allow merging the same value into the same key. Set to false by default, should only be true for body_json injections
26
+ """
27
+ path = path or []
28
+ for key, source_value in source.items():
29
+ current_path = path + [str(key)]
30
+
31
+ if key in target:
32
+ target_value = target[key]
33
+ if isinstance(target_value, dict) and isinstance(source_value, dict):
34
+ # Only body_json supports nested_structures
35
+ if not allow_same_value_merge:
36
+ raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}")
37
+ # If both are dictionaries, recursively merge them
38
+ _merge_mappings(target_value, source_value, current_path, allow_same_value_merge)
39
+
40
+ elif not allow_same_value_merge or target_value != source_value:
41
+ # If same key has different values, that's a conflict
42
+ raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}")
43
+ else:
44
+ # No conflict, just copy the value (using deepcopy for nested structures)
45
+ target[key] = copy.deepcopy(source_value)
7
46
 
8
47
 
9
48
  def combine_mappings(
10
49
  mappings: List[Optional[Union[Mapping[str, Any], str]]],
50
+ allow_same_value_merge: bool = False,
11
51
  ) -> Union[Mapping[str, Any], str]:
12
52
  """
13
- Combine multiple mappings into a single mapping. If any of the mappings are a string, return
14
- that string. Raise errors in the following cases:
15
- * If there are duplicate keys across mappings
16
- * If there are multiple string mappings
17
- * If there are multiple mappings containing keys and one of them is a string
53
+ Combine multiple mappings into a single mapping.
54
+
55
+ For body_json requests (allow_same_value_merge=True):
56
+ - Supports nested structures (e.g., {"data": {"user": {"id": 1}}})
57
+ - Allows duplicate keys if their values match
58
+ - Raises error if same path has different values
59
+
60
+ For other request types (allow_same_value_merge=False):
61
+ - Only supports flat structures
62
+ - Any duplicate key raises an error, regardless of value
63
+
64
+ Args:
65
+ mappings: List of mappings to combine
66
+ allow_same_value_merge: Whether to allow duplicate keys with matching values.
67
+ Should only be True for body_json requests.
68
+
69
+ Returns:
70
+ A single mapping combining all inputs, or a string if there is exactly one
71
+ string mapping and no other non-empty mappings.
72
+
73
+ Raises:
74
+ ValueError: If there are:
75
+ - Multiple string mappings
76
+ - Both a string mapping and non-empty dictionary mappings
77
+ - Conflicting keys/paths based on allow_same_value_merge setting
18
78
  """
19
- all_keys: List[Set[str]] = []
20
- for part in mappings:
21
- if part is None:
22
- continue
23
- keys = set(part.keys()) if not isinstance(part, str) else set()
24
- all_keys.append(keys)
25
-
26
- string_options = sum(isinstance(mapping, str) for mapping in mappings)
27
- # If more than one mapping is a string, raise a ValueError
79
+ if not mappings:
80
+ return {}
81
+
82
+ # Count how many string options we have, ignoring None values
83
+ string_options = sum(isinstance(mapping, str) for mapping in mappings if mapping is not None)
28
84
  if string_options > 1:
29
85
  raise ValueError("Cannot combine multiple string options")
30
86
 
31
- if string_options == 1 and sum(len(keys) for keys in all_keys) > 0:
32
- raise ValueError("Cannot combine multiple options if one is a string")
87
+ # Filter out None values and empty mappings
88
+ non_empty_mappings = [
89
+ m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m)
90
+ ]
33
91
 
34
- # If any mapping is a string, return it
35
- for mapping in mappings:
36
- if isinstance(mapping, str):
37
- return mapping
92
+ # If there is only one string option and no other non-empty mappings, return it
93
+ if string_options == 1:
94
+ if len(non_empty_mappings) > 1:
95
+ raise ValueError("Cannot combine multiple options if one is a string")
96
+ return next(m for m in non_empty_mappings if isinstance(m, str))
38
97
 
39
- # If there are duplicate keys across mappings, raise a ValueError
40
- intersection = set().union(*all_keys)
41
- if len(intersection) < sum(len(keys) for keys in all_keys):
42
- raise ValueError(f"Duplicate keys found: {intersection}")
98
+ # Start with an empty result and merge each mapping into it
99
+ result: Dict[str, Any] = {}
100
+ for mapping in non_empty_mappings:
101
+ if mapping and isinstance(mapping, Mapping):
102
+ _merge_mappings(result, mapping, allow_same_value_merge=allow_same_value_merge)
43
103
 
44
- # Return the combined mappings
45
- return {key: value for mapping in mappings if mapping for key, value in mapping.items()} # type: ignore # mapping can't be string here
104
+ return result
@@ -16,14 +16,7 @@ class SliceHasher:
16
16
  _ENCODING: Final = "utf-8"
17
17
 
18
18
  @classmethod
19
- def hash(
20
- cls,
21
- stream_name: str = "<stream name not provided>",
22
- stream_slice: Optional[Mapping[str, Any]] = None,
23
- ) -> int:
24
- """
25
- Note that streams partition with the same slicing value but with different names might collapse if stream name is not provided
26
- """
19
+ def hash(cls, stream_name: str, stream_slice: Optional[Mapping[str, Any]] = None) -> int:
27
20
  if stream_slice:
28
21
  try:
29
22
  s = json.dumps(stream_slice, sort_keys=True, cls=SliceEncoder)