airbyte-cdk 6.8.1rc8__py3-none-any.whl → 6.8.1rc10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +11 -5
- airbyte_cdk/config_observation.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -1
- airbyte_cdk/connector_builder/message_grouper.py +10 -10
- airbyte_cdk/destinations/destination.py +1 -1
- airbyte_cdk/destinations/vector_db_based/embedder.py +2 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +12 -4
- airbyte_cdk/entrypoint.py +7 -6
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/sources/abstract_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -1
- airbyte_cdk/sources/connector_state_manager.py +9 -4
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +6 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -42
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +10 -4
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +116 -19
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +4 -1
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +8 -6
- airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
- airbyte_cdk/sources/declarative/interpolation/macros.py +1 -1
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +53 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +95 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +6 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +100 -27
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +2 -1
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +13 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +8 -6
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -1
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +2 -2
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/resolvers/__init__.py +13 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +106 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +5 -2
- airbyte_cdk/sources/declarative/spec/spec.py +1 -1
- airbyte_cdk/sources/embedded/base_integration.py +3 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +18 -7
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +14 -11
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +3 -3
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +11 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -1
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +3 -3
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +5 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
- airbyte_cdk/sources/streams/core.py +17 -14
- airbyte_cdk/sources/streams/http/http.py +19 -19
- airbyte_cdk/sources/streams/http/http_client.py +4 -48
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +62 -33
- airbyte_cdk/sources/utils/record_helper.py +1 -1
- airbyte_cdk/sources/utils/schema_helpers.py +1 -1
- airbyte_cdk/sources/utils/transform.py +34 -15
- airbyte_cdk/test/entrypoint_wrapper.py +11 -6
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +1 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/message_utils.py +4 -3
- airbyte_cdk/utils/spec_schema_transformations.py +3 -2
- airbyte_cdk/utils/traced_exception.py +14 -12
- airbyte_cdk-6.8.1rc10.dist-info/METADATA +111 -0
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/RECORD +73 -70
- airbyte_cdk-6.8.1rc8.dist-info/METADATA +0 -307
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/entry_points.txt +0 -0
@@ -54,7 +54,6 @@ from airbyte_cdk.utils.stream_status_utils import (
|
|
54
54
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
55
55
|
|
56
56
|
BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
|
57
|
-
logger = logging.getLogger("airbyte")
|
58
57
|
|
59
58
|
|
60
59
|
class MessageRepresentationAirbyteTracedErrors(AirbyteTracedException):
|
@@ -95,7 +94,6 @@ class HttpClient:
|
|
95
94
|
):
|
96
95
|
self._name = name
|
97
96
|
self._api_budget: APIBudget = api_budget or APIBudget(policies=[])
|
98
|
-
self._logger = logger
|
99
97
|
if session:
|
100
98
|
self._session = session
|
101
99
|
else:
|
@@ -109,6 +107,7 @@ class HttpClient:
|
|
109
107
|
)
|
110
108
|
if isinstance(authenticator, AuthBase):
|
111
109
|
self._session.auth = authenticator
|
110
|
+
self._logger = logger
|
112
111
|
self._error_handler = error_handler or HttpStatusErrorHandler(self._logger)
|
113
112
|
if backoff_strategy is not None:
|
114
113
|
if isinstance(backoff_strategy, list):
|
@@ -142,12 +141,10 @@ class HttpClient:
|
|
142
141
|
if cache_dir:
|
143
142
|
sqlite_path = str(Path(cache_dir) / self.cache_filename)
|
144
143
|
else:
|
145
|
-
self._logger.info("Using memory for cache") # TODO: remove
|
146
144
|
sqlite_path = "file::memory:?cache=shared"
|
147
|
-
backend = SkipFailureSQLiteCache(self._name, sqlite_path) # TODO maybe add a busy timeout
|
148
145
|
return CachedLimiterSession(
|
149
|
-
sqlite_path, backend=
|
150
|
-
)
|
146
|
+
sqlite_path, backend="sqlite", api_budget=self._api_budget, match_headers=True
|
147
|
+
)
|
151
148
|
else:
|
152
149
|
return LimiterSession(api_budget=self._api_budget)
|
153
150
|
|
@@ -327,7 +324,7 @@ class HttpClient:
|
|
327
324
|
formatter = log_formatter
|
328
325
|
self._message_repository.log_message(
|
329
326
|
Level.DEBUG,
|
330
|
-
lambda: formatter(response),
|
327
|
+
lambda: formatter(response),
|
331
328
|
)
|
332
329
|
|
333
330
|
self._handle_error_resolution(
|
@@ -520,44 +517,3 @@ class HttpClient:
|
|
520
517
|
)
|
521
518
|
|
522
519
|
return request, response
|
523
|
-
|
524
|
-
|
525
|
-
class SkipFailureSQLiteDict(requests_cache.backends.sqlite.SQLiteDict):
|
526
|
-
def __getitem__(self, key): # type: ignore # lib is not typed
|
527
|
-
try:
|
528
|
-
return super().__getitem__(key) # type: ignore # lib is not typed
|
529
|
-
except Exception as exception:
|
530
|
-
if not isinstance(exception, KeyError):
|
531
|
-
logger.warning(f"Error while retrieving item from cache: {exception}")
|
532
|
-
else:
|
533
|
-
raise exception
|
534
|
-
|
535
|
-
def _write(self, key: str, value: str) -> None:
|
536
|
-
try:
|
537
|
-
super()._write(key, value) # type: ignore # lib is not typed
|
538
|
-
except Exception as exception:
|
539
|
-
logger.warning(f"Error while saving item to cache: {exception}")
|
540
|
-
|
541
|
-
|
542
|
-
class SkipFailureSQLiteCache(requests_cache.backends.sqlite.SQLiteCache):
|
543
|
-
def __init__( # type: ignore # ignoring as lib is not typed
|
544
|
-
self,
|
545
|
-
table_name="response",
|
546
|
-
db_path="http_cache",
|
547
|
-
serializer=None,
|
548
|
-
**kwargs,
|
549
|
-
) -> None:
|
550
|
-
super().__init__(db_path, serializer, **kwargs)
|
551
|
-
skwargs = {"serializer": serializer, **kwargs} if serializer else kwargs
|
552
|
-
self.responses: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
|
553
|
-
db_path, table_name=table_name, fast_save=True, wal=True, **skwargs
|
554
|
-
)
|
555
|
-
self.redirects: requests_cache.backends.sqlite.SQLiteDict = SkipFailureSQLiteDict(
|
556
|
-
db_path,
|
557
|
-
table_name=f"redirects_{table_name}",
|
558
|
-
fast_save=True,
|
559
|
-
wal=True,
|
560
|
-
lock=self.responses._lock,
|
561
|
-
serializer=None,
|
562
|
-
**kwargs,
|
563
|
-
)
|
@@ -5,13 +5,14 @@
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from typing import Any, Mapping
|
7
7
|
|
8
|
+
import requests
|
8
9
|
from requests.auth import AuthBase
|
9
10
|
|
10
11
|
|
11
12
|
class AbstractHeaderAuthenticator(AuthBase):
|
12
13
|
"""Abstract class for an header-based authenticators that add a header to outgoing HTTP requests."""
|
13
14
|
|
14
|
-
def __call__(self, request):
|
15
|
+
def __call__(self, request: requests.PreparedRequest) -> Any:
|
15
16
|
"""Attach the HTTP headers required to authenticate on the HTTP request"""
|
16
17
|
request.headers.update(self.get_auth_header())
|
17
18
|
return request
|
@@ -30,12 +30,12 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
30
30
|
client_id: str,
|
31
31
|
client_secret: str,
|
32
32
|
refresh_token: str,
|
33
|
-
scopes: List[str] = None,
|
34
|
-
token_expiry_date: pendulum.DateTime = None,
|
35
|
-
token_expiry_date_format: str = None,
|
33
|
+
scopes: List[str] | None = None,
|
34
|
+
token_expiry_date: pendulum.DateTime | None = None,
|
35
|
+
token_expiry_date_format: str | None = None,
|
36
36
|
access_token_name: str = "access_token",
|
37
37
|
expires_in_name: str = "expires_in",
|
38
|
-
refresh_request_body: Mapping[str, Any] = None,
|
38
|
+
refresh_request_body: Mapping[str, Any] | None = None,
|
39
39
|
grant_type: str = "refresh_token",
|
40
40
|
token_expiry_is_time_of_expiration: bool = False,
|
41
41
|
refresh_token_error_status_codes: Tuple[int, ...] = (),
|
@@ -52,7 +52,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
52
52
|
self._refresh_request_body = refresh_request_body
|
53
53
|
self._grant_type = grant_type
|
54
54
|
|
55
|
-
self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1)
|
55
|
+
self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) # type: ignore [no-untyped-call]
|
56
56
|
self._token_expiry_date_format = token_expiry_date_format
|
57
57
|
self._token_expiry_is_time_of_expiration = token_expiry_is_time_of_expiration
|
58
58
|
self._access_token = None
|
@@ -75,14 +75,14 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
75
75
|
def get_access_token_name(self) -> str:
|
76
76
|
return self._access_token_name
|
77
77
|
|
78
|
-
def get_scopes(self) -> [str]:
|
79
|
-
return self._scopes
|
78
|
+
def get_scopes(self) -> list[str]:
|
79
|
+
return self._scopes # type: ignore [return-value]
|
80
80
|
|
81
81
|
def get_expires_in_name(self) -> str:
|
82
82
|
return self._expires_in_name
|
83
83
|
|
84
84
|
def get_refresh_request_body(self) -> Mapping[str, Any]:
|
85
|
-
return self._refresh_request_body
|
85
|
+
return self._refresh_request_body # type: ignore [return-value]
|
86
86
|
|
87
87
|
def get_grant_type(self) -> str:
|
88
88
|
return self._grant_type
|
@@ -90,7 +90,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
90
90
|
def get_token_expiry_date(self) -> pendulum.DateTime:
|
91
91
|
return self._token_expiry_date
|
92
92
|
|
93
|
-
def set_token_expiry_date(self, value: Union[str, int]):
|
93
|
+
def set_token_expiry_date(self, value: Union[str, int]) -> None:
|
94
94
|
self._token_expiry_date = self._parse_token_expiration_date(value)
|
95
95
|
|
96
96
|
@property
|
@@ -103,11 +103,11 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
103
103
|
|
104
104
|
@property
|
105
105
|
def access_token(self) -> str:
|
106
|
-
return self._access_token
|
106
|
+
return self._access_token # type: ignore [return-value]
|
107
107
|
|
108
108
|
@access_token.setter
|
109
|
-
def access_token(self, value: str):
|
110
|
-
self._access_token = value
|
109
|
+
def access_token(self, value: str) -> None:
|
110
|
+
self._access_token = value # type: ignore [assignment] # Incorrect type for assignment
|
111
111
|
|
112
112
|
|
113
113
|
class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
@@ -124,11 +124,11 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
124
124
|
self,
|
125
125
|
connector_config: Mapping[str, Any],
|
126
126
|
token_refresh_endpoint: str,
|
127
|
-
scopes: List[str] = None,
|
127
|
+
scopes: List[str] | None = None,
|
128
128
|
access_token_name: str = "access_token",
|
129
129
|
expires_in_name: str = "expires_in",
|
130
130
|
refresh_token_name: str = "refresh_token",
|
131
|
-
refresh_request_body: Mapping[str, Any] = None,
|
131
|
+
refresh_request_body: Mapping[str, Any] | None = None,
|
132
132
|
grant_type: str = "refresh_token",
|
133
133
|
client_id: Optional[str] = None,
|
134
134
|
client_secret: Optional[str] = None,
|
@@ -162,14 +162,17 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
162
162
|
message_repository (MessageRepository): the message repository used to emit logs on HTTP requests and control message on config update
|
163
163
|
"""
|
164
164
|
self._client_id = (
|
165
|
-
client_id
|
165
|
+
client_id # type: ignore [assignment] # Incorrect type for assignment
|
166
166
|
if client_id is not None
|
167
|
-
else dpath.get(connector_config, ("credentials", "client_id"))
|
167
|
+
else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore [arg-type]
|
168
168
|
)
|
169
169
|
self._client_secret = (
|
170
|
-
client_secret
|
170
|
+
client_secret # type: ignore [assignment] # Incorrect type for assignment
|
171
171
|
if client_secret is not None
|
172
|
-
else dpath.get(
|
172
|
+
else dpath.get(
|
173
|
+
connector_config, # type: ignore [arg-type]
|
174
|
+
("credentials", "client_secret"),
|
175
|
+
)
|
173
176
|
)
|
174
177
|
self._access_token_config_path = access_token_config_path
|
175
178
|
self._refresh_token_config_path = refresh_token_config_path
|
@@ -207,27 +210,50 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
207
210
|
|
208
211
|
@property
|
209
212
|
def access_token(self) -> str:
|
210
|
-
return dpath.get(
|
213
|
+
return dpath.get( # type: ignore [return-value]
|
214
|
+
self._connector_config, # type: ignore [arg-type]
|
215
|
+
self._access_token_config_path,
|
216
|
+
default="",
|
217
|
+
)
|
211
218
|
|
212
219
|
@access_token.setter
|
213
|
-
def access_token(self, new_access_token: str):
|
214
|
-
dpath.new(
|
220
|
+
def access_token(self, new_access_token: str) -> None:
|
221
|
+
dpath.new(
|
222
|
+
self._connector_config, # type: ignore [arg-type]
|
223
|
+
self._access_token_config_path,
|
224
|
+
new_access_token,
|
225
|
+
)
|
215
226
|
|
216
227
|
def get_refresh_token(self) -> str:
|
217
|
-
return dpath.get(
|
228
|
+
return dpath.get( # type: ignore [return-value]
|
229
|
+
self._connector_config, # type: ignore [arg-type]
|
230
|
+
self._refresh_token_config_path,
|
231
|
+
default="",
|
232
|
+
)
|
218
233
|
|
219
|
-
def set_refresh_token(self, new_refresh_token: str):
|
220
|
-
dpath.new(
|
234
|
+
def set_refresh_token(self, new_refresh_token: str) -> None:
|
235
|
+
dpath.new(
|
236
|
+
self._connector_config, # type: ignore [arg-type]
|
237
|
+
self._refresh_token_config_path,
|
238
|
+
new_refresh_token,
|
239
|
+
)
|
221
240
|
|
222
241
|
def get_token_expiry_date(self) -> pendulum.DateTime:
|
223
242
|
expiry_date = dpath.get(
|
224
|
-
self._connector_config,
|
243
|
+
self._connector_config, # type: ignore [arg-type]
|
244
|
+
self._token_expiry_date_config_path,
|
245
|
+
default="",
|
225
246
|
)
|
226
|
-
return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date)
|
247
|
+
return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) # type: ignore [arg-type, return-value, no-untyped-call]
|
227
248
|
|
228
|
-
def set_token_expiry_date(
|
249
|
+
def set_token_expiry_date( # type: ignore[override]
|
250
|
+
self,
|
251
|
+
new_token_expiry_date: pendulum.DateTime,
|
252
|
+
) -> None:
|
229
253
|
dpath.new(
|
230
|
-
self._connector_config,
|
254
|
+
self._connector_config, # type: ignore [arg-type]
|
255
|
+
self._token_expiry_date_config_path,
|
256
|
+
str(new_token_expiry_date),
|
231
257
|
)
|
232
258
|
|
233
259
|
def token_has_expired(self) -> bool:
|
@@ -236,7 +262,8 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
236
262
|
|
237
263
|
@staticmethod
|
238
264
|
def get_new_token_expiry_date(
|
239
|
-
access_token_expires_in: str,
|
265
|
+
access_token_expires_in: str,
|
266
|
+
token_expiry_date_format: str | None = None,
|
240
267
|
) -> pendulum.DateTime:
|
241
268
|
if token_expiry_date_format:
|
242
269
|
return pendulum.from_format(access_token_expires_in, token_expiry_date_format)
|
@@ -253,7 +280,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
253
280
|
new_access_token, access_token_expires_in, new_refresh_token = (
|
254
281
|
self.refresh_access_token()
|
255
282
|
)
|
256
|
-
new_token_expiry_date = self.get_new_token_expiry_date(
|
283
|
+
new_token_expiry_date: pendulum.DateTime = self.get_new_token_expiry_date(
|
257
284
|
access_token_expires_in, self._token_expiry_date_format
|
258
285
|
)
|
259
286
|
self.access_token = new_access_token
|
@@ -264,13 +291,15 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
264
291
|
# message directly in the console, this is needed
|
265
292
|
if not isinstance(self._message_repository, NoopMessageRepository):
|
266
293
|
self._message_repository.emit_message(
|
267
|
-
create_connector_config_control_message(self._connector_config)
|
294
|
+
create_connector_config_control_message(self._connector_config) # type: ignore [arg-type]
|
268
295
|
)
|
269
296
|
else:
|
270
|
-
emit_configuration_as_airbyte_control_message(self._connector_config)
|
297
|
+
emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore [arg-type]
|
271
298
|
return self.access_token
|
272
299
|
|
273
|
-
def refresh_access_token(
|
300
|
+
def refresh_access_token( # type: ignore[override] # Signature doesn't match base class
|
301
|
+
self,
|
302
|
+
) -> Tuple[str, str, str]:
|
274
303
|
response_json = self._get_refresh_access_token_response()
|
275
304
|
return (
|
276
305
|
response_json[self.get_access_token_name()],
|
@@ -35,7 +35,7 @@ def stream_data_to_airbyte_message(
|
|
35
35
|
# need it to normalize values against json schema. By default no action
|
36
36
|
# taken unless configured. See
|
37
37
|
# docs/connector-development/cdk-python/schemas.md for details.
|
38
|
-
transformer.transform(data, schema)
|
38
|
+
transformer.transform(data, schema)
|
39
39
|
if is_file_transfer_message:
|
40
40
|
message = AirbyteFileTransferRecordMessage(
|
41
41
|
stream=stream_name, file=data, emitted_at=now_millis, data={}
|
@@ -194,7 +194,7 @@ class InternalConfig(BaseModel):
|
|
194
194
|
def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
195
195
|
kwargs["by_alias"] = True
|
196
196
|
kwargs["exclude_unset"] = True
|
197
|
-
return super().dict(*args, **kwargs)
|
197
|
+
return super().dict(*args, **kwargs)
|
198
198
|
|
199
199
|
def is_limit_reached(self, records_counter: int) -> bool:
|
200
200
|
"""
|
@@ -5,9 +5,9 @@
|
|
5
5
|
import logging
|
6
6
|
from distutils.util import strtobool
|
7
7
|
from enum import Flag, auto
|
8
|
-
from typing import Any, Callable, Dict, Mapping, Optional
|
8
|
+
from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast
|
9
9
|
|
10
|
-
from jsonschema import Draft7Validator, ValidationError, validators
|
10
|
+
from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators
|
11
11
|
|
12
12
|
json_to_python_simple = {
|
13
13
|
"string": str,
|
@@ -30,7 +30,7 @@ class TransformConfig(Flag):
|
|
30
30
|
```
|
31
31
|
"""
|
32
32
|
|
33
|
-
# No action taken, default
|
33
|
+
# No action taken, default behavior. Cannot be combined with any other options.
|
34
34
|
NoTransform = auto()
|
35
35
|
# Applies default type casting with default_convert method which converts
|
36
36
|
# values by applying simple type casting to specified jsonschema type.
|
@@ -67,15 +67,15 @@ class TypeTransformer:
|
|
67
67
|
)
|
68
68
|
|
69
69
|
def registerCustomTransform(
|
70
|
-
self, normalization_callback: Callable[[Any,
|
71
|
-
) -> Callable:
|
70
|
+
self, normalization_callback: Callable[[Any, dict[str, Any]], Any]
|
71
|
+
) -> Callable[[Any, dict[str, Any]], Any]:
|
72
72
|
"""
|
73
73
|
Register custom normalization callback.
|
74
74
|
:param normalization_callback function to be used for value
|
75
75
|
normalization. Takes original value and part type schema. Should return
|
76
76
|
normalized value. See docs/connector-development/cdk-python/schemas.md
|
77
77
|
for details.
|
78
|
-
:return Same
|
78
|
+
:return Same callback, this is useful for using registerCustomTransform function as decorator.
|
79
79
|
"""
|
80
80
|
if TransformConfig.CustomSchemaNormalization not in self._config:
|
81
81
|
raise Exception(
|
@@ -141,7 +141,11 @@ class TypeTransformer:
|
|
141
141
|
return original_item
|
142
142
|
return original_item
|
143
143
|
|
144
|
-
def __get_normalizer(
|
144
|
+
def __get_normalizer(
|
145
|
+
self,
|
146
|
+
schema_key: str,
|
147
|
+
original_validator: Callable, # type: ignore[type-arg]
|
148
|
+
) -> Callable[[Any, Any, Any, dict[str, Any]], Generator[Any, Any, None]]:
|
145
149
|
"""
|
146
150
|
Traverse through object fields using native jsonschema validator and apply normalization function.
|
147
151
|
:param schema_key related json schema key that currently being validated/normalized.
|
@@ -149,8 +153,11 @@ class TypeTransformer:
|
|
149
153
|
"""
|
150
154
|
|
151
155
|
def normalizator(
|
152
|
-
validator_instance:
|
153
|
-
|
156
|
+
validator_instance: Validator,
|
157
|
+
property_value: Any,
|
158
|
+
instance: Any,
|
159
|
+
schema: Dict[str, Any],
|
160
|
+
) -> Generator[Any, Any, None]:
|
154
161
|
"""
|
155
162
|
Jsonschema validator callable it uses for validating instance. We
|
156
163
|
override default Draft7Validator to perform value transformation
|
@@ -163,10 +170,13 @@ class TypeTransformer:
|
|
163
170
|
:
|
164
171
|
"""
|
165
172
|
|
166
|
-
def resolve(subschema):
|
173
|
+
def resolve(subschema: dict[str, Any]) -> dict[str, Any]:
|
167
174
|
if "$ref" in subschema:
|
168
|
-
_, resolved =
|
169
|
-
|
175
|
+
_, resolved = cast(
|
176
|
+
RefResolver,
|
177
|
+
validator_instance.resolver,
|
178
|
+
).resolve(subschema["$ref"])
|
179
|
+
return cast(dict[str, Any], resolved)
|
170
180
|
return subschema
|
171
181
|
|
172
182
|
# Transform object and array values before running json schema type checking for each element.
|
@@ -185,11 +195,20 @@ class TypeTransformer:
|
|
185
195
|
instance[index] = self.__normalize(item, subschema)
|
186
196
|
|
187
197
|
# Running native jsonschema traverse algorithm after field normalization is done.
|
188
|
-
yield from original_validator(
|
198
|
+
yield from original_validator(
|
199
|
+
validator_instance,
|
200
|
+
property_value,
|
201
|
+
instance,
|
202
|
+
schema,
|
203
|
+
)
|
189
204
|
|
190
205
|
return normalizator
|
191
206
|
|
192
|
-
def transform(
|
207
|
+
def transform(
|
208
|
+
self,
|
209
|
+
record: Dict[str, Any],
|
210
|
+
schema: Mapping[str, Any],
|
211
|
+
) -> None:
|
193
212
|
"""
|
194
213
|
Normalize and validate according to config.
|
195
214
|
:param record: record instance for normalization/transformation. All modification are done by modifying existent object.
|
@@ -201,7 +220,7 @@ class TypeTransformer:
|
|
201
220
|
for e in normalizer.iter_errors(record):
|
202
221
|
"""
|
203
222
|
just calling normalizer.validate() would throw an exception on
|
204
|
-
first validation
|
223
|
+
first validation occurrences and stop processing rest of schema.
|
205
224
|
"""
|
206
225
|
logger.warning(self.get_error_message(e))
|
207
226
|
|
@@ -23,7 +23,7 @@ from io import StringIO
|
|
23
23
|
from pathlib import Path
|
24
24
|
from typing import Any, List, Mapping, Optional, Union
|
25
25
|
|
26
|
-
|
26
|
+
import orjson
|
27
27
|
from pydantic import ValidationError as V2ValidationError
|
28
28
|
from serpyco_rs import SchemaValidationError
|
29
29
|
|
@@ -63,7 +63,7 @@ class EntrypointOutput:
|
|
63
63
|
@staticmethod
|
64
64
|
def _parse_message(message: str) -> AirbyteMessage:
|
65
65
|
try:
|
66
|
-
return AirbyteMessageSerializer.load(orjson.loads(message))
|
66
|
+
return AirbyteMessageSerializer.load(orjson.loads(message))
|
67
67
|
except (orjson.JSONDecodeError, SchemaValidationError):
|
68
68
|
# The platform assumes that logs that are not of AirbyteMessage format are log messages
|
69
69
|
return AirbyteMessage(
|
@@ -129,14 +129,19 @@ class EntrypointOutput:
|
|
129
129
|
return [
|
130
130
|
message
|
131
131
|
for message in self._get_message_by_types([Type.TRACE])
|
132
|
-
if message.trace.type == trace_type
|
133
|
-
]
|
132
|
+
if message.trace.type == trace_type # type: ignore[union-attr] # trace has `type`
|
133
|
+
]
|
134
134
|
|
135
135
|
def is_in_logs(self, pattern: str) -> bool:
|
136
136
|
"""Check if any log message case-insensitive matches the pattern."""
|
137
137
|
return any(
|
138
|
-
re.search(
|
139
|
-
|
138
|
+
re.search(
|
139
|
+
pattern,
|
140
|
+
entry.log.message, # type: ignore[union-attr] # log has `message`
|
141
|
+
flags=re.IGNORECASE,
|
142
|
+
)
|
143
|
+
for entry in self.logs
|
144
|
+
)
|
140
145
|
|
141
146
|
def is_not_in_logs(self, pattern: str) -> bool:
|
142
147
|
"""Check if no log message matches the case-insensitive pattern."""
|
@@ -183,7 +183,7 @@ class HttpResponseBuilder:
|
|
183
183
|
|
184
184
|
def _get_unit_test_folder(execution_folder: str) -> FilePath:
|
185
185
|
# FIXME: This function should be removed after the next CDK release to avoid breaking amazon-seller-partner test code.
|
186
|
-
return get_unit_test_folder(execution_folder)
|
186
|
+
return get_unit_test_folder(execution_folder)
|
187
187
|
|
188
188
|
|
189
189
|
def find_template(resource: str, execution_folder: str) -> Dict[str, Any]:
|
@@ -47,7 +47,7 @@ def get_secrets(
|
|
47
47
|
result = []
|
48
48
|
for path in secret_paths:
|
49
49
|
try:
|
50
|
-
result.append(dpath.get(config, path))
|
50
|
+
result.append(dpath.get(config, path)) # type: ignore # dpath expect MutableMapping but doesn't need it
|
51
51
|
except KeyError:
|
52
52
|
# Since we try to get paths to all known secrets in the spec, in the case of oneOfs, some secret fields may not be present
|
53
53
|
# In that case, a KeyError is thrown. This is expected behavior.
|
@@ -7,7 +7,7 @@ import logging
|
|
7
7
|
import time
|
8
8
|
from contextlib import contextmanager
|
9
9
|
from dataclasses import dataclass, field
|
10
|
-
from typing import Optional
|
10
|
+
from typing import Any, Generator, Literal, Optional
|
11
11
|
|
12
12
|
logger = logging.getLogger("airbyte")
|
13
13
|
|
@@ -18,13 +18,13 @@ class EventTimer:
|
|
18
18
|
Event nesting follows a LIFO pattern, so finish will apply to the last started event.
|
19
19
|
"""
|
20
20
|
|
21
|
-
def __init__(self, name):
|
21
|
+
def __init__(self, name: str) -> None:
|
22
22
|
self.name = name
|
23
|
-
self.events = {}
|
23
|
+
self.events: dict[str, Any] = {}
|
24
24
|
self.count = 0
|
25
|
-
self.stack = []
|
25
|
+
self.stack: list[Any] = []
|
26
26
|
|
27
|
-
def start_event(self, name):
|
27
|
+
def start_event(self, name: str) -> None:
|
28
28
|
"""
|
29
29
|
Start a new event and push it to the stack.
|
30
30
|
"""
|
@@ -32,7 +32,7 @@ class EventTimer:
|
|
32
32
|
self.count += 1
|
33
33
|
self.stack.insert(0, self.events[name])
|
34
34
|
|
35
|
-
def finish_event(self):
|
35
|
+
def finish_event(self) -> None:
|
36
36
|
"""
|
37
37
|
Finish the current event and pop it from the stack.
|
38
38
|
"""
|
@@ -43,7 +43,7 @@ class EventTimer:
|
|
43
43
|
else:
|
44
44
|
logger.warning(f"{self.name} finish_event called without start_event")
|
45
45
|
|
46
|
-
def report(self, order_by="name"):
|
46
|
+
def report(self, order_by: Literal["name", "duration"] = "name") -> str:
|
47
47
|
"""
|
48
48
|
:param order_by: 'name' or 'duration'
|
49
49
|
"""
|
@@ -69,15 +69,15 @@ class Event:
|
|
69
69
|
return (self.end - self.start) / 1e9
|
70
70
|
return float("+inf")
|
71
71
|
|
72
|
-
def __str__(self):
|
72
|
+
def __str__(self) -> str:
|
73
73
|
return f"{self.name} {datetime.timedelta(seconds=self.duration)}"
|
74
74
|
|
75
|
-
def finish(self):
|
75
|
+
def finish(self) -> None:
|
76
76
|
self.end = time.perf_counter_ns()
|
77
77
|
|
78
78
|
|
79
79
|
@contextmanager
|
80
|
-
def create_timer(name):
|
80
|
+
def create_timer(name: str) -> Generator[EventTimer, Any, None]:
|
81
81
|
"""
|
82
82
|
Creates a new EventTimer as a context manager to improve code readability.
|
83
83
|
"""
|
@@ -8,15 +8,16 @@ def get_stream_descriptor(message: AirbyteMessage) -> HashableStreamDescriptor:
|
|
8
8
|
match message.type:
|
9
9
|
case Type.RECORD:
|
10
10
|
return HashableStreamDescriptor(
|
11
|
-
name=message.record.stream,
|
12
|
-
|
11
|
+
name=message.record.stream, # type: ignore[union-attr] # record has `stream`
|
12
|
+
namespace=message.record.namespace, # type: ignore[union-attr] # record has `namespace`
|
13
|
+
)
|
13
14
|
case Type.STATE:
|
14
15
|
if not message.state.stream or not message.state.stream.stream_descriptor: # type: ignore[union-attr] # state has `stream`
|
15
16
|
raise ValueError(
|
16
17
|
"State message was not in per-stream state format, which is required for record counts."
|
17
18
|
)
|
18
19
|
return HashableStreamDescriptor(
|
19
|
-
name=message.state.stream.stream_descriptor.name,
|
20
|
+
name=message.state.stream.stream_descriptor.name, # type: ignore[union-attr] # state has `stream`
|
20
21
|
namespace=message.state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # state has `stream`
|
21
22
|
)
|
22
23
|
case _:
|
@@ -4,11 +4,12 @@
|
|
4
4
|
|
5
5
|
import json
|
6
6
|
import re
|
7
|
+
from typing import Any
|
7
8
|
|
8
9
|
from jsonschema import RefResolver
|
9
10
|
|
10
11
|
|
11
|
-
def resolve_refs(schema: dict) -> dict:
|
12
|
+
def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]:
|
12
13
|
"""
|
13
14
|
For spec schemas generated using Pydantic models, the resulting JSON schema can contain refs between object
|
14
15
|
relationships.
|
@@ -20,6 +21,6 @@ def resolve_refs(schema: dict) -> dict:
|
|
20
21
|
str_schema = str_schema.replace(
|
21
22
|
ref_block, json.dumps(json_schema_ref_resolver.resolve(ref)[1])
|
22
23
|
)
|
23
|
-
pyschema: dict = json.loads(str_schema)
|
24
|
+
pyschema: dict[str, Any] = json.loads(str_schema)
|
24
25
|
del pyschema["definitions"]
|
25
26
|
return pyschema
|
@@ -3,9 +3,9 @@
|
|
3
3
|
#
|
4
4
|
import time
|
5
5
|
import traceback
|
6
|
-
from typing import Optional
|
6
|
+
from typing import Any, Optional
|
7
7
|
|
8
|
-
|
8
|
+
import orjson
|
9
9
|
|
10
10
|
from airbyte_cdk.models import (
|
11
11
|
AirbyteConnectionStatus,
|
@@ -104,9 +104,9 @@ class AirbyteTracedException(Exception):
|
|
104
104
|
cls,
|
105
105
|
exc: BaseException,
|
106
106
|
stream_descriptor: Optional[StreamDescriptor] = None,
|
107
|
-
*args,
|
108
|
-
**kwargs,
|
109
|
-
) -> "AirbyteTracedException":
|
107
|
+
*args: Any,
|
108
|
+
**kwargs: Any,
|
109
|
+
) -> "AirbyteTracedException":
|
110
110
|
"""
|
111
111
|
Helper to create an AirbyteTracedException from an existing exception
|
112
112
|
:param exc: the exception that caused the error
|
@@ -131,13 +131,15 @@ class AirbyteTracedException(Exception):
|
|
131
131
|
"""
|
132
132
|
error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor)
|
133
133
|
if error_message.trace.error.message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage
|
134
|
-
error_message.trace.error.message = filter_secrets(
|
134
|
+
error_message.trace.error.message = filter_secrets( # type: ignore[union-attr]
|
135
|
+
error_message.trace.error.message, # type: ignore[union-attr]
|
136
|
+
)
|
135
137
|
if error_message.trace.error.internal_message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage
|
136
|
-
error_message.trace.error.internal_message = filter_secrets(
|
137
|
-
error_message.trace.error.internal_message
|
138
|
-
)
|
138
|
+
error_message.trace.error.internal_message = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage
|
139
|
+
error_message.trace.error.internal_message # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage
|
140
|
+
)
|
139
141
|
if error_message.trace.error.stack_trace: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage
|
140
|
-
error_message.trace.error.stack_trace = filter_secrets(
|
141
|
-
error_message.trace.error.stack_trace
|
142
|
-
)
|
142
|
+
error_message.trace.error.stack_trace = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage
|
143
|
+
error_message.trace.error.stack_trace # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage
|
144
|
+
)
|
143
145
|
return error_message
|