airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
- airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
- airbyte_cdk/sources/types.py +2 -4
- airbyte_cdk/sources/utils/transform.py +2 -23
- airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
- airbyte_cdk/utils/mapping_helpers.py +86 -27
- airbyte_cdk/utils/slice_hasher.py +1 -8
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
- airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
- airbyte_cdk/utils/datetime_helpers.py +0 -499
- airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -2,10 +2,10 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from datetime import timedelta
|
6
5
|
from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union
|
7
6
|
|
8
7
|
import dpath
|
8
|
+
import pendulum
|
9
9
|
|
10
10
|
from airbyte_cdk.config_observation import (
|
11
11
|
create_connector_config_control_message,
|
@@ -15,11 +15,6 @@ from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
|
15
15
|
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import (
|
16
16
|
AbstractOauth2Authenticator,
|
17
17
|
)
|
18
|
-
from airbyte_cdk.utils.datetime_helpers import (
|
19
|
-
AirbyteDateTime,
|
20
|
-
ab_datetime_now,
|
21
|
-
ab_datetime_parse,
|
22
|
-
)
|
23
18
|
|
24
19
|
|
25
20
|
class Oauth2Authenticator(AbstractOauth2Authenticator):
|
@@ -39,7 +34,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
39
34
|
client_secret_name: str = "client_secret",
|
40
35
|
refresh_token_name: str = "refresh_token",
|
41
36
|
scopes: List[str] | None = None,
|
42
|
-
token_expiry_date:
|
37
|
+
token_expiry_date: pendulum.DateTime | None = None,
|
43
38
|
token_expiry_date_format: str | None = None,
|
44
39
|
access_token_name: str = "access_token",
|
45
40
|
expires_in_name: str = "expires_in",
|
@@ -51,7 +46,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
51
46
|
refresh_token_error_status_codes: Tuple[int, ...] = (),
|
52
47
|
refresh_token_error_key: str = "",
|
53
48
|
refresh_token_error_values: Tuple[str, ...] = (),
|
54
|
-
)
|
49
|
+
):
|
55
50
|
self._token_refresh_endpoint = token_refresh_endpoint
|
56
51
|
self._client_secret_name = client_secret_name
|
57
52
|
self._client_secret = client_secret
|
@@ -67,7 +62,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
67
62
|
self._grant_type_name = grant_type_name
|
68
63
|
self._grant_type = grant_type
|
69
64
|
|
70
|
-
self._token_expiry_date = token_expiry_date or (
|
65
|
+
self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) # type: ignore [no-untyped-call]
|
71
66
|
self._token_expiry_date_format = token_expiry_date_format
|
72
67
|
self._token_expiry_is_time_of_expiration = token_expiry_is_time_of_expiration
|
73
68
|
self._access_token = None
|
@@ -100,16 +95,16 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
100
95
|
return self._access_token_name
|
101
96
|
|
102
97
|
def get_scopes(self) -> list[str]:
|
103
|
-
return self._scopes # type: ignore[return-value]
|
98
|
+
return self._scopes # type: ignore [return-value]
|
104
99
|
|
105
100
|
def get_expires_in_name(self) -> str:
|
106
101
|
return self._expires_in_name
|
107
102
|
|
108
103
|
def get_refresh_request_body(self) -> Mapping[str, Any]:
|
109
|
-
return self._refresh_request_body # type: ignore[return-value]
|
104
|
+
return self._refresh_request_body # type: ignore [return-value]
|
110
105
|
|
111
106
|
def get_refresh_request_headers(self) -> Mapping[str, Any]:
|
112
|
-
return self._refresh_request_headers # type: ignore[return-value]
|
107
|
+
return self._refresh_request_headers # type: ignore [return-value]
|
113
108
|
|
114
109
|
def get_grant_type_name(self) -> str:
|
115
110
|
return self._grant_type_name
|
@@ -117,7 +112,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
117
112
|
def get_grant_type(self) -> str:
|
118
113
|
return self._grant_type
|
119
114
|
|
120
|
-
def get_token_expiry_date(self) ->
|
115
|
+
def get_token_expiry_date(self) -> pendulum.DateTime:
|
121
116
|
return self._token_expiry_date
|
122
117
|
|
123
118
|
def set_token_expiry_date(self, value: Union[str, int]) -> None:
|
@@ -133,11 +128,11 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
133
128
|
|
134
129
|
@property
|
135
130
|
def access_token(self) -> str:
|
136
|
-
return self._access_token # type: ignore[return-value]
|
131
|
+
return self._access_token # type: ignore [return-value]
|
137
132
|
|
138
133
|
@access_token.setter
|
139
134
|
def access_token(self, value: str) -> None:
|
140
|
-
self._access_token = value # type: ignore[assignment] # Incorrect type for assignment
|
135
|
+
self._access_token = value # type: ignore [assignment] # Incorrect type for assignment
|
141
136
|
|
142
137
|
|
143
138
|
class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
@@ -175,7 +170,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
175
170
|
refresh_token_error_status_codes: Tuple[int, ...] = (),
|
176
171
|
refresh_token_error_key: str = "",
|
177
172
|
refresh_token_error_values: Tuple[str, ...] = (),
|
178
|
-
)
|
173
|
+
):
|
179
174
|
"""
|
180
175
|
Args:
|
181
176
|
connector_config (Mapping[str, Any]): The full connector configuration
|
@@ -196,12 +191,18 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
196
191
|
token_expiry_is_time_of_expiration bool: set True it if expires_in is returned as time of expiration instead of the number seconds until expiration
|
197
192
|
message_repository (MessageRepository): the message repository used to emit logs on HTTP requests and control message on config update
|
198
193
|
"""
|
199
|
-
self.
|
200
|
-
|
201
|
-
|
194
|
+
self._client_id = (
|
195
|
+
client_id # type: ignore [assignment] # Incorrect type for assignment
|
196
|
+
if client_id is not None
|
197
|
+
else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore [arg-type]
|
202
198
|
)
|
203
|
-
self._client_secret
|
204
|
-
|
199
|
+
self._client_secret = (
|
200
|
+
client_secret # type: ignore [assignment] # Incorrect type for assignment
|
201
|
+
if client_secret is not None
|
202
|
+
else dpath.get(
|
203
|
+
connector_config, # type: ignore [arg-type]
|
204
|
+
("credentials", "client_secret"),
|
205
|
+
)
|
205
206
|
)
|
206
207
|
self._client_id_name = client_id_name
|
207
208
|
self._client_secret_name = client_secret_name
|
@@ -216,9 +217,9 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
216
217
|
super().__init__(
|
217
218
|
token_refresh_endpoint=token_refresh_endpoint,
|
218
219
|
client_id_name=self._client_id_name,
|
219
|
-
client_id=self.
|
220
|
+
client_id=self.get_client_id(),
|
220
221
|
client_secret_name=self._client_secret_name,
|
221
|
-
client_secret=self.
|
222
|
+
client_secret=self.get_client_secret(),
|
222
223
|
refresh_token=self.get_refresh_token(),
|
223
224
|
refresh_token_name=self._refresh_token_name,
|
224
225
|
scopes=scopes,
|
@@ -236,105 +237,76 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
236
237
|
refresh_token_error_values=refresh_token_error_values,
|
237
238
|
)
|
238
239
|
|
240
|
+
def get_refresh_token_name(self) -> str:
|
241
|
+
return self._refresh_token_name
|
242
|
+
|
243
|
+
def get_client_id(self) -> str:
|
244
|
+
return self._client_id
|
245
|
+
|
246
|
+
def get_client_secret(self) -> str:
|
247
|
+
return self._client_secret
|
248
|
+
|
239
249
|
@property
|
240
250
|
def access_token(self) -> str:
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
"""
|
247
|
-
return self._get_config_value_by_path(self._access_token_config_path) # type: ignore[return-value]
|
251
|
+
return dpath.get( # type: ignore [return-value]
|
252
|
+
self._connector_config, # type: ignore [arg-type]
|
253
|
+
self._access_token_config_path,
|
254
|
+
default="",
|
255
|
+
)
|
248
256
|
|
249
257
|
@access_token.setter
|
250
258
|
def access_token(self, new_access_token: str) -> None:
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
"""
|
257
|
-
self._set_config_value_by_path(self._access_token_config_path, new_access_token)
|
259
|
+
dpath.new(
|
260
|
+
self._connector_config, # type: ignore [arg-type]
|
261
|
+
self._access_token_config_path,
|
262
|
+
new_access_token,
|
263
|
+
)
|
258
264
|
|
259
265
|
def get_refresh_token(self) -> str:
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
Returns:
|
267
|
-
str: The refresh token as a string.
|
268
|
-
"""
|
269
|
-
return self._get_config_value_by_path(self._refresh_token_config_path) # type: ignore[return-value]
|
266
|
+
return dpath.get( # type: ignore [return-value]
|
267
|
+
self._connector_config, # type: ignore [arg-type]
|
268
|
+
self._refresh_token_config_path,
|
269
|
+
default="",
|
270
|
+
)
|
270
271
|
|
271
272
|
def set_refresh_token(self, new_refresh_token: str) -> None:
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
new_refresh_token (str): The new refresh token to be set.
|
277
|
-
"""
|
278
|
-
self._set_config_value_by_path(self._refresh_token_config_path, new_refresh_token)
|
279
|
-
|
280
|
-
def get_token_expiry_date(self) -> AirbyteDateTime:
|
281
|
-
"""
|
282
|
-
Retrieves the token expiry date from the configuration.
|
283
|
-
|
284
|
-
This method fetches the token expiry date from the configuration using the specified path.
|
285
|
-
If the expiry date is an empty string, it returns the current date and time minus one day.
|
286
|
-
Otherwise, it parses the expiry date string into an AirbyteDateTime object.
|
287
|
-
|
288
|
-
Returns:
|
289
|
-
AirbyteDateTime: The parsed or calculated token expiry date.
|
290
|
-
|
291
|
-
Raises:
|
292
|
-
TypeError: If the result is not an instance of AirbyteDateTime.
|
293
|
-
"""
|
294
|
-
expiry_date = self._get_config_value_by_path(self._token_expiry_date_config_path)
|
295
|
-
result = (
|
296
|
-
ab_datetime_now() - timedelta(days=1)
|
297
|
-
if expiry_date == ""
|
298
|
-
else ab_datetime_parse(str(expiry_date))
|
273
|
+
dpath.new(
|
274
|
+
self._connector_config, # type: ignore [arg-type]
|
275
|
+
self._refresh_token_config_path,
|
276
|
+
new_refresh_token,
|
299
277
|
)
|
300
|
-
if isinstance(result, AirbyteDateTime):
|
301
|
-
return result
|
302
|
-
raise TypeError("Invalid datetime conversion")
|
303
278
|
|
304
|
-
def
|
305
|
-
|
306
|
-
|
279
|
+
def get_token_expiry_date(self) -> pendulum.DateTime:
|
280
|
+
expiry_date = dpath.get(
|
281
|
+
self._connector_config, # type: ignore [arg-type]
|
282
|
+
self._token_expiry_date_config_path,
|
283
|
+
default="",
|
284
|
+
)
|
285
|
+
return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) # type: ignore [arg-type, return-value, no-untyped-call]
|
307
286
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
287
|
+
def set_token_expiry_date( # type: ignore[override]
|
288
|
+
self,
|
289
|
+
new_token_expiry_date: pendulum.DateTime,
|
290
|
+
) -> None:
|
291
|
+
dpath.new(
|
292
|
+
self._connector_config, # type: ignore [arg-type]
|
293
|
+
self._token_expiry_date_config_path,
|
294
|
+
str(new_token_expiry_date),
|
313
295
|
)
|
314
296
|
|
315
297
|
def token_has_expired(self) -> bool:
|
316
298
|
"""Returns True if the token is expired"""
|
317
|
-
return
|
299
|
+
return pendulum.now("UTC") > self.get_token_expiry_date()
|
318
300
|
|
319
301
|
@staticmethod
|
320
302
|
def get_new_token_expiry_date(
|
321
303
|
access_token_expires_in: str,
|
322
304
|
token_expiry_date_format: str | None = None,
|
323
|
-
) ->
|
324
|
-
"""
|
325
|
-
Calculate the new token expiry date based on the provided expiration duration or format.
|
326
|
-
|
327
|
-
Args:
|
328
|
-
access_token_expires_in (str): The duration (in seconds) until the access token expires, or the expiry date in a specific format.
|
329
|
-
token_expiry_date_format (str | None, optional): The format of the expiry date if provided. Defaults to None.
|
330
|
-
|
331
|
-
Returns:
|
332
|
-
AirbyteDateTime: The calculated expiry date of the access token.
|
333
|
-
"""
|
305
|
+
) -> pendulum.DateTime:
|
334
306
|
if token_expiry_date_format:
|
335
|
-
return
|
307
|
+
return pendulum.from_format(access_token_expires_in, token_expiry_date_format)
|
336
308
|
else:
|
337
|
-
return
|
309
|
+
return pendulum.now("UTC").add(seconds=int(access_token_expires_in))
|
338
310
|
|
339
311
|
def get_access_token(self) -> str:
|
340
312
|
"""Retrieve new access and refresh token if the access token has expired.
|
@@ -346,88 +318,33 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
346
318
|
new_access_token, access_token_expires_in, new_refresh_token = (
|
347
319
|
self.refresh_access_token()
|
348
320
|
)
|
349
|
-
new_token_expiry_date:
|
321
|
+
new_token_expiry_date: pendulum.DateTime = self.get_new_token_expiry_date(
|
350
322
|
access_token_expires_in, self._token_expiry_date_format
|
351
323
|
)
|
352
324
|
self.access_token = new_access_token
|
353
325
|
self.set_refresh_token(new_refresh_token)
|
354
326
|
self.set_token_expiry_date(new_token_expiry_date)
|
355
|
-
|
327
|
+
# FIXME emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message
|
328
|
+
# Usually, a class shouldn't care about the implementation details but to keep backward compatibility where we print the
|
329
|
+
# message directly in the console, this is needed
|
330
|
+
if not isinstance(self._message_repository, NoopMessageRepository):
|
331
|
+
self._message_repository.emit_message(
|
332
|
+
create_connector_config_control_message(self._connector_config) # type: ignore [arg-type]
|
333
|
+
)
|
334
|
+
else:
|
335
|
+
emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore [arg-type]
|
356
336
|
return self.access_token
|
357
337
|
|
358
|
-
def refresh_access_token(
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
Returns:
|
363
|
-
Tuple[str, str, str]: A tuple containing the new access token, token expiry date, and refresh token.
|
364
|
-
"""
|
365
|
-
response_json = self._make_handled_request()
|
338
|
+
def refresh_access_token( # type: ignore[override] # Signature doesn't match base class
|
339
|
+
self,
|
340
|
+
) -> Tuple[str, str, str]:
|
341
|
+
response_json = self._get_refresh_access_token_response()
|
366
342
|
return (
|
367
|
-
self.
|
368
|
-
self.
|
369
|
-
self.
|
370
|
-
)
|
371
|
-
|
372
|
-
def _set_config_value_by_path(self, config_path: Union[str, Sequence[str]], value: Any) -> None:
|
373
|
-
"""
|
374
|
-
Set a value in the connector configuration at the specified path.
|
375
|
-
|
376
|
-
Args:
|
377
|
-
config_path (Union[str, Sequence[str]]): The path within the configuration where the value should be set.
|
378
|
-
This can be a string representing a single key or a sequence of strings representing a nested path.
|
379
|
-
value (Any): The value to set at the specified path in the configuration.
|
380
|
-
|
381
|
-
Returns:
|
382
|
-
None
|
383
|
-
"""
|
384
|
-
dpath.new(self._connector_config, config_path, value) # type: ignore[arg-type]
|
385
|
-
|
386
|
-
def _get_config_value_by_path(
|
387
|
-
self, config_path: Union[str, Sequence[str]], default: Optional[str] = None
|
388
|
-
) -> str | Any:
|
389
|
-
"""
|
390
|
-
Retrieve a value from the connector configuration using a specified path.
|
391
|
-
|
392
|
-
Args:
|
393
|
-
config_path (Union[str, Sequence[str]]): The path to the desired configuration value. This can be a string or a sequence of strings.
|
394
|
-
default (Optional[str], optional): The default value to return if the specified path does not exist in the configuration. Defaults to None.
|
395
|
-
|
396
|
-
Returns:
|
397
|
-
Any: The value from the configuration at the specified path, or the default value if the path does not exist.
|
398
|
-
"""
|
399
|
-
return dpath.get(
|
400
|
-
self._connector_config, # type: ignore[arg-type]
|
401
|
-
config_path,
|
402
|
-
default=default if default is not None else "",
|
343
|
+
response_json[self.get_access_token_name()],
|
344
|
+
response_json[self.get_expires_in_name()],
|
345
|
+
response_json[self.get_refresh_token_name()],
|
403
346
|
)
|
404
347
|
|
405
|
-
def _emit_control_message(self) -> None:
|
406
|
-
"""
|
407
|
-
Emits a control message based on the connector configuration.
|
408
|
-
|
409
|
-
This method checks if the message repository is not a NoopMessageRepository.
|
410
|
-
If it is not, it emits a message using the message repository. Otherwise,
|
411
|
-
it falls back to emitting the configuration as an Airbyte control message
|
412
|
-
directly to the console for backward compatibility.
|
413
|
-
|
414
|
-
Note:
|
415
|
-
The function `emit_configuration_as_airbyte_control_message` has been deprecated
|
416
|
-
in favor of the package `airbyte_cdk.sources.message`.
|
417
|
-
|
418
|
-
Raises:
|
419
|
-
TypeError: If the argument types are incorrect.
|
420
|
-
"""
|
421
|
-
# FIXME emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message
|
422
|
-
# Usually, a class shouldn't care about the implementation details but to keep backward compatibility where we print the
|
423
|
-
# message directly in the console, this is needed
|
424
|
-
if not isinstance(self._message_repository, NoopMessageRepository):
|
425
|
-
self._message_repository.emit_message(
|
426
|
-
create_connector_config_control_message(self._connector_config) # type: ignore[arg-type]
|
427
|
-
)
|
428
|
-
else:
|
429
|
-
emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore[arg-type]
|
430
|
-
|
431
348
|
@property
|
432
349
|
def _message_repository(self) -> MessageRepository:
|
433
350
|
"""
|
airbyte_cdk/sources/types.py
CHANGED
@@ -6,7 +6,7 @@ from __future__ import annotations
|
|
6
6
|
|
7
7
|
from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
|
8
8
|
|
9
|
-
|
9
|
+
import orjson
|
10
10
|
|
11
11
|
# A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
|
12
12
|
# "hello"}] returns "hello"
|
@@ -151,9 +151,7 @@ class StreamSlice(Mapping[str, Any]):
|
|
151
151
|
return self._stream_slice
|
152
152
|
|
153
153
|
def __hash__(self) -> int:
|
154
|
-
return
|
155
|
-
stream_slice=self._stream_slice
|
156
|
-
) # no need to provide stream_name here as this is used for slicing the cursor
|
154
|
+
return hash(orjson.dumps(self._stream_slice, option=orjson.OPT_SORT_KEYS))
|
157
155
|
|
158
156
|
def __bool__(self) -> bool:
|
159
157
|
return bool(self._stream_slice) or bool(self._extra_fields)
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
+
from distutils.util import strtobool
|
6
7
|
from enum import Flag, auto
|
7
8
|
from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast
|
8
9
|
|
@@ -21,28 +22,6 @@ python_to_json = {v: k for k, v in json_to_python.items()}
|
|
21
22
|
|
22
23
|
logger = logging.getLogger("airbyte")
|
23
24
|
|
24
|
-
_TRUTHY_STRINGS = ("y", "yes", "t", "true", "on", "1")
|
25
|
-
_FALSEY_STRINGS = ("n", "no", "f", "false", "off", "0")
|
26
|
-
|
27
|
-
|
28
|
-
def _strtobool(value: str, /) -> int:
|
29
|
-
"""Mimic the behavior of distutils.util.strtobool.
|
30
|
-
|
31
|
-
From: https://docs.python.org/2/distutils/apiref.html#distutils.util.strtobool
|
32
|
-
|
33
|
-
> Convert a string representation of truth to true (1) or false (0).
|
34
|
-
> True values are y, yes, t, true, on and 1; false values are n, no, f, false, off and 0. Raises
|
35
|
-
> `ValueError` if val is anything else.
|
36
|
-
"""
|
37
|
-
normalized_str = value.lower().strip()
|
38
|
-
if normalized_str in _TRUTHY_STRINGS:
|
39
|
-
return 1
|
40
|
-
|
41
|
-
if normalized_str in _FALSEY_STRINGS:
|
42
|
-
return 0
|
43
|
-
|
44
|
-
raise ValueError(f"Invalid boolean value: {normalized_str}")
|
45
|
-
|
46
25
|
|
47
26
|
class TransformConfig(Flag):
|
48
27
|
"""
|
@@ -150,7 +129,7 @@ class TypeTransformer:
|
|
150
129
|
return int(original_item)
|
151
130
|
elif target_type == "boolean":
|
152
131
|
if isinstance(original_item, str):
|
153
|
-
return
|
132
|
+
return strtobool(original_item) == 1
|
154
133
|
return bool(original_item)
|
155
134
|
elif target_type == "array":
|
156
135
|
item_types = set(subschema.get("items", {}).get("type", set()))
|
@@ -4,6 +4,7 @@
|
|
4
4
|
import importlib.util
|
5
5
|
from pathlib import Path
|
6
6
|
from types import ModuleType
|
7
|
+
from typing import Optional
|
7
8
|
|
8
9
|
import pytest
|
9
10
|
|
@@ -29,7 +30,7 @@ def connector_dir(request: pytest.FixtureRequest) -> Path:
|
|
29
30
|
|
30
31
|
|
31
32
|
@pytest.fixture(scope="session")
|
32
|
-
def components_module(connector_dir: Path) -> ModuleType
|
33
|
+
def components_module(connector_dir: Path) -> Optional[ModuleType]:
|
33
34
|
"""Load and return the components module from the connector directory.
|
34
35
|
|
35
36
|
This assumes the components module is located at <connector_dir>/components.py.
|
@@ -3,43 +3,102 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
|
6
|
-
|
6
|
+
import copy
|
7
|
+
from typing import Any, Dict, List, Mapping, Optional, Union
|
8
|
+
|
9
|
+
|
10
|
+
def _merge_mappings(
|
11
|
+
target: Dict[str, Any],
|
12
|
+
source: Mapping[str, Any],
|
13
|
+
path: Optional[List[str]] = None,
|
14
|
+
allow_same_value_merge: bool = False,
|
15
|
+
) -> None:
|
16
|
+
"""
|
17
|
+
Recursively merge two dictionaries, raising an error if there are any conflicts.
|
18
|
+
For body_json requests (allow_same_value_merge=True), a conflict occurs only when the same path has different values.
|
19
|
+
For other request types (allow_same_value_merge=False), any duplicate key is a conflict, regardless of value.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
target: The dictionary to merge into
|
23
|
+
source: The dictionary to merge from
|
24
|
+
path: The current path in the nested structure (for error messages)
|
25
|
+
allow_same_value_merge: Whether to allow merging the same value into the same key. Set to false by default, should only be true for body_json injections
|
26
|
+
"""
|
27
|
+
path = path or []
|
28
|
+
for key, source_value in source.items():
|
29
|
+
current_path = path + [str(key)]
|
30
|
+
|
31
|
+
if key in target:
|
32
|
+
target_value = target[key]
|
33
|
+
if isinstance(target_value, dict) and isinstance(source_value, dict):
|
34
|
+
# Only body_json supports nested_structures
|
35
|
+
if not allow_same_value_merge:
|
36
|
+
raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}")
|
37
|
+
# If both are dictionaries, recursively merge them
|
38
|
+
_merge_mappings(target_value, source_value, current_path, allow_same_value_merge)
|
39
|
+
|
40
|
+
elif not allow_same_value_merge or target_value != source_value:
|
41
|
+
# If same key has different values, that's a conflict
|
42
|
+
raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}")
|
43
|
+
else:
|
44
|
+
# No conflict, just copy the value (using deepcopy for nested structures)
|
45
|
+
target[key] = copy.deepcopy(source_value)
|
7
46
|
|
8
47
|
|
9
48
|
def combine_mappings(
|
10
49
|
mappings: List[Optional[Union[Mapping[str, Any], str]]],
|
50
|
+
allow_same_value_merge: bool = False,
|
11
51
|
) -> Union[Mapping[str, Any], str]:
|
12
52
|
"""
|
13
|
-
Combine multiple mappings into a single mapping.
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
53
|
+
Combine multiple mappings into a single mapping.
|
54
|
+
|
55
|
+
For body_json requests (allow_same_value_merge=True):
|
56
|
+
- Supports nested structures (e.g., {"data": {"user": {"id": 1}}})
|
57
|
+
- Allows duplicate keys if their values match
|
58
|
+
- Raises error if same path has different values
|
59
|
+
|
60
|
+
For other request types (allow_same_value_merge=False):
|
61
|
+
- Only supports flat structures
|
62
|
+
- Any duplicate key raises an error, regardless of value
|
63
|
+
|
64
|
+
Args:
|
65
|
+
mappings: List of mappings to combine
|
66
|
+
allow_same_value_merge: Whether to allow duplicate keys with matching values.
|
67
|
+
Should only be True for body_json requests.
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
A single mapping combining all inputs, or a string if there is exactly one
|
71
|
+
string mapping and no other non-empty mappings.
|
72
|
+
|
73
|
+
Raises:
|
74
|
+
ValueError: If there are:
|
75
|
+
- Multiple string mappings
|
76
|
+
- Both a string mapping and non-empty dictionary mappings
|
77
|
+
- Conflicting keys/paths based on allow_same_value_merge setting
|
18
78
|
"""
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
all_keys.append(keys)
|
25
|
-
|
26
|
-
string_options = sum(isinstance(mapping, str) for mapping in mappings)
|
27
|
-
# If more than one mapping is a string, raise a ValueError
|
79
|
+
if not mappings:
|
80
|
+
return {}
|
81
|
+
|
82
|
+
# Count how many string options we have, ignoring None values
|
83
|
+
string_options = sum(isinstance(mapping, str) for mapping in mappings if mapping is not None)
|
28
84
|
if string_options > 1:
|
29
85
|
raise ValueError("Cannot combine multiple string options")
|
30
86
|
|
31
|
-
|
32
|
-
|
87
|
+
# Filter out None values and empty mappings
|
88
|
+
non_empty_mappings = [
|
89
|
+
m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m)
|
90
|
+
]
|
33
91
|
|
34
|
-
# If
|
35
|
-
|
36
|
-
if
|
37
|
-
|
92
|
+
# If there is only one string option and no other non-empty mappings, return it
|
93
|
+
if string_options == 1:
|
94
|
+
if len(non_empty_mappings) > 1:
|
95
|
+
raise ValueError("Cannot combine multiple options if one is a string")
|
96
|
+
return next(m for m in non_empty_mappings if isinstance(m, str))
|
38
97
|
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
98
|
+
# Start with an empty result and merge each mapping into it
|
99
|
+
result: Dict[str, Any] = {}
|
100
|
+
for mapping in non_empty_mappings:
|
101
|
+
if mapping and isinstance(mapping, Mapping):
|
102
|
+
_merge_mappings(result, mapping, allow_same_value_merge=allow_same_value_merge)
|
43
103
|
|
44
|
-
|
45
|
-
return {key: value for mapping in mappings if mapping for key, value in mapping.items()} # type: ignore # mapping can't be string here
|
104
|
+
return result
|
@@ -16,14 +16,7 @@ class SliceHasher:
|
|
16
16
|
_ENCODING: Final = "utf-8"
|
17
17
|
|
18
18
|
@classmethod
|
19
|
-
def hash(
|
20
|
-
cls,
|
21
|
-
stream_name: str = "<stream name not provided>",
|
22
|
-
stream_slice: Optional[Mapping[str, Any]] = None,
|
23
|
-
) -> int:
|
24
|
-
"""
|
25
|
-
Note that streams partition with the same slicing value but with different names might collapse if stream name is not provided
|
26
|
-
"""
|
19
|
+
def hash(cls, stream_name: str, stream_slice: Optional[Mapping[str, Any]] = None) -> int:
|
27
20
|
if stream_slice:
|
28
21
|
try:
|
29
22
|
s = json.dumps(stream_slice, sort_keys=True, cls=SliceEncoder)
|