airbyte-cdk 6.7.1rc4__py3-none-any.whl → 6.7.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +2 -1
- airbyte_cdk/config_observation.py +2 -1
- airbyte_cdk/connector.py +1 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
- airbyte_cdk/connector_builder/main.py +2 -1
- airbyte_cdk/destinations/destination.py +2 -1
- airbyte_cdk/destinations/vector_db_based/config.py +2 -1
- airbyte_cdk/destinations/vector_db_based/document_processor.py +4 -3
- airbyte_cdk/destinations/vector_db_based/embedder.py +5 -4
- airbyte_cdk/entrypoint.py +3 -2
- airbyte_cdk/logger.py +2 -1
- airbyte_cdk/models/__init__.py +2 -0
- airbyte_cdk/models/airbyte_protocol.py +2 -1
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
- airbyte_cdk/sources/config.py +2 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +1 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -0
- airbyte_cdk/sources/declarative/auth/token.py +2 -1
- airbyte_cdk/sources/declarative/auth/token_provider.py +3 -2
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +66 -8
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +196 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -2
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +1 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +1 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +1 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +1 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +6 -48
- airbyte_cdk/sources/declarative/extractors/record_selector.py +32 -4
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +7 -2
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +2 -1
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +5 -2
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +5 -2
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +1 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +5 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -3
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +144 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +45 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +1 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -2
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +9 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +1 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -1
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +12 -7
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -4
- airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +1 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -0
- airbyte_cdk/sources/embedded/tools.py +1 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +2 -1
- airbyte_cdk/sources/file_based/config/excel_format.py +2 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -1
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +2 -1
- airbyte_cdk/sources/file_based/file_based_source.py +2 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +2 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +9 -8
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +5 -4
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +1 -0
- airbyte_cdk/sources/streams/call_rate.py +1 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -1
- airbyte_cdk/sources/streams/concurrent/adapters.py +8 -4
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +2 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +52 -9
- airbyte_cdk/sources/streams/concurrent/default_stream.py +1 -0
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
- airbyte_cdk/sources/streams/core.py +2 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +2 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +1 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +1 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +2 -1
- airbyte_cdk/sources/streams/http/http.py +3 -2
- airbyte_cdk/sources/streams/http/http_client.py +58 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +1 -0
- airbyte_cdk/sources/types.py +14 -1
- airbyte_cdk/sources/utils/schema_helpers.py +3 -2
- airbyte_cdk/sql/secrets.py +2 -1
- airbyte_cdk/sql/shared/sql_processor.py +8 -6
- airbyte_cdk/test/entrypoint_wrapper.py +4 -3
- airbyte_cdk/test/mock_http/mocker.py +1 -0
- airbyte_cdk/utils/schema_inferrer.py +2 -1
- airbyte_cdk/utils/slice_hasher.py +1 -1
- airbyte_cdk/utils/traced_exception.py +2 -1
- {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/METADATA +9 -2
- {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/RECORD +123 -124
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -35
- {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -10,6 +10,8 @@ from dataclasses import dataclass
|
|
10
10
|
from functools import cached_property, lru_cache
|
11
11
|
from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
|
12
12
|
|
13
|
+
from deprecated import deprecated
|
14
|
+
|
13
15
|
import airbyte_cdk.sources.utils.casing as casing
|
14
16
|
from airbyte_cdk.models import (
|
15
17
|
AirbyteMessage,
|
@@ -35,7 +37,6 @@ from airbyte_cdk.sources.types import StreamSlice
|
|
35
37
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
|
36
38
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
37
39
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
38
|
-
from deprecated import deprecated
|
39
40
|
|
40
41
|
# A stream's read method can return one of the following types:
|
41
42
|
# Mapping[str, Any]: The content of an AirbyteRecordMessage
|
@@ -4,12 +4,13 @@
|
|
4
4
|
|
5
5
|
from typing import Mapping, Type, Union
|
6
6
|
|
7
|
+
from requests.exceptions import InvalidSchema, InvalidURL, RequestException
|
8
|
+
|
7
9
|
from airbyte_cdk.models import FailureType
|
8
10
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
9
11
|
ErrorResolution,
|
10
12
|
ResponseAction,
|
11
13
|
)
|
12
|
-
from requests.exceptions import InvalidSchema, InvalidURL, RequestException
|
13
14
|
|
14
15
|
DEFAULT_ERROR_MAPPING: Mapping[Union[int, str, Type[Exception]], ErrorResolution] = {
|
15
16
|
InvalidSchema: ErrorResolution(
|
@@ -5,9 +5,10 @@ from enum import Enum
|
|
5
5
|
from typing import Optional, Union
|
6
6
|
|
7
7
|
import requests
|
8
|
+
from requests import HTTPError
|
9
|
+
|
8
10
|
from airbyte_cdk.models import FailureType
|
9
11
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
10
|
-
from requests import HTTPError
|
11
12
|
|
12
13
|
|
13
14
|
class ResponseAction(Enum):
|
@@ -9,6 +9,9 @@ from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optio
|
|
9
9
|
from urllib.parse import urljoin
|
10
10
|
|
11
11
|
import requests
|
12
|
+
from deprecated import deprecated
|
13
|
+
from requests.auth import AuthBase
|
14
|
+
|
12
15
|
from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode
|
13
16
|
from airbyte_cdk.models import Type as MessageType
|
14
17
|
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
|
@@ -33,8 +36,6 @@ from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
|
33
36
|
from airbyte_cdk.sources.streams.http.http_client import HttpClient
|
34
37
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
35
38
|
from airbyte_cdk.sources.utils.types import JsonType
|
36
|
-
from deprecated import deprecated
|
37
|
-
from requests.auth import AuthBase
|
38
39
|
|
39
40
|
# list of all possible HTTP methods which can be used for sending of request bodies
|
40
41
|
BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
|
@@ -6,11 +6,13 @@ import logging
|
|
6
6
|
import os
|
7
7
|
import urllib
|
8
8
|
from pathlib import Path
|
9
|
-
from typing import Any, Callable, List, Mapping, Optional, Tuple, Union
|
9
|
+
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
|
10
10
|
|
11
11
|
import orjson
|
12
12
|
import requests
|
13
13
|
import requests_cache
|
14
|
+
from requests.auth import AuthBase
|
15
|
+
|
14
16
|
from airbyte_cdk.models import (
|
15
17
|
AirbyteMessageSerializer,
|
16
18
|
AirbyteStreamStatus,
|
@@ -43,13 +45,13 @@ from airbyte_cdk.sources.streams.http.rate_limiting import (
|
|
43
45
|
rate_limit_default_backoff_handler,
|
44
46
|
user_defined_backoff_handler,
|
45
47
|
)
|
48
|
+
from airbyte_cdk.sources.utils.types import JsonType
|
46
49
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
47
50
|
from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
|
48
51
|
from airbyte_cdk.utils.stream_status_utils import (
|
49
52
|
as_airbyte_message as stream_status_as_airbyte_message,
|
50
53
|
)
|
51
54
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
52
|
-
from requests.auth import AuthBase
|
53
55
|
|
54
56
|
BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
|
55
57
|
|
@@ -74,6 +76,7 @@ class MessageRepresentationAirbyteTracedErrors(AirbyteTracedException):
|
|
74
76
|
class HttpClient:
|
75
77
|
_DEFAULT_MAX_RETRY: int = 5
|
76
78
|
_DEFAULT_MAX_TIME: int = 60 * 10
|
79
|
+
_ACTIONS_TO_RETRY_ON = {ResponseAction.RETRY, ResponseAction.RATE_LIMITED}
|
77
80
|
|
78
81
|
def __init__(
|
79
82
|
self,
|
@@ -91,11 +94,9 @@ class HttpClient:
|
|
91
94
|
):
|
92
95
|
self._name = name
|
93
96
|
self._api_budget: APIBudget = api_budget or APIBudget(policies=[])
|
94
|
-
self._is_session_owner = False
|
95
97
|
if session:
|
96
98
|
self._session = session
|
97
99
|
else:
|
98
|
-
self._is_session_owner = True
|
99
100
|
self._use_cache = use_cache
|
100
101
|
self._session = self._request_session()
|
101
102
|
self._session.mount(
|
@@ -116,13 +117,10 @@ class HttpClient:
|
|
116
117
|
else:
|
117
118
|
self._backoff_strategies = [DefaultBackoffStrategy()]
|
118
119
|
self._error_message_parser = error_message_parser or JsonErrorMessageParser()
|
120
|
+
self._request_attempt_count: Dict[requests.PreparedRequest, int] = {}
|
119
121
|
self._disable_retries = disable_retries
|
120
122
|
self._message_repository = message_repository
|
121
123
|
|
122
|
-
def __del__(self):
|
123
|
-
if self._is_session_owner:
|
124
|
-
self._session.close()
|
125
|
-
|
126
124
|
@property
|
127
125
|
def cache_filename(self) -> str:
|
128
126
|
"""
|
@@ -275,6 +273,13 @@ class HttpClient:
|
|
275
273
|
log_formatter: Optional[Callable[[requests.Response], Any]] = None,
|
276
274
|
exit_on_rate_limit: Optional[bool] = False,
|
277
275
|
) -> requests.Response:
|
276
|
+
if request not in self._request_attempt_count:
|
277
|
+
self._request_attempt_count[request] = 1
|
278
|
+
else:
|
279
|
+
self._request_attempt_count[request] += 1
|
280
|
+
if hasattr(self._session, "auth") and isinstance(self._session.auth, AuthBase):
|
281
|
+
self._session.auth(request)
|
282
|
+
|
278
283
|
self._logger.debug(
|
279
284
|
"Making outbound API request",
|
280
285
|
extra={"headers": request.headers, "url": request.url, "request_body": request.body},
|
@@ -332,6 +337,40 @@ class HttpClient:
|
|
332
337
|
|
333
338
|
return response # type: ignore # will either return a valid response of type requests.Response or raise an exception
|
334
339
|
|
340
|
+
def _get_response_body(self, response: requests.Response) -> Optional[JsonType]:
|
341
|
+
"""
|
342
|
+
Extracts and returns the body of an HTTP response.
|
343
|
+
|
344
|
+
This method attempts to parse the response body as JSON. If the response
|
345
|
+
body is not valid JSON, it falls back to decoding the response content
|
346
|
+
as a UTF-8 string. If both attempts fail, it returns None.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
response (requests.Response): The HTTP response object.
|
350
|
+
|
351
|
+
Returns:
|
352
|
+
Optional[JsonType]: The parsed JSON object as a string, the decoded
|
353
|
+
response content as a string, or None if both parsing attempts fail.
|
354
|
+
"""
|
355
|
+
try:
|
356
|
+
return str(response.json())
|
357
|
+
except requests.exceptions.JSONDecodeError:
|
358
|
+
try:
|
359
|
+
return response.content.decode("utf-8")
|
360
|
+
except Exception:
|
361
|
+
return "The Content of the Response couldn't be decoded."
|
362
|
+
|
363
|
+
def _evict_key(self, prepared_request: requests.PreparedRequest) -> None:
|
364
|
+
"""
|
365
|
+
Addresses high memory consumption when enabling concurrency in https://github.com/airbytehq/oncall/issues/6821.
|
366
|
+
|
367
|
+
The `_request_attempt_count` attribute keeps growing as multiple requests are made using the same `http_client`.
|
368
|
+
To mitigate this issue, we evict keys for completed requests once we confirm that no further retries are needed.
|
369
|
+
This helps manage memory usage more efficiently while maintaining the necessary logic for retry attempts.
|
370
|
+
"""
|
371
|
+
if prepared_request in self._request_attempt_count:
|
372
|
+
del self._request_attempt_count[prepared_request]
|
373
|
+
|
335
374
|
def _handle_error_resolution(
|
336
375
|
self,
|
337
376
|
response: Optional[requests.Response],
|
@@ -340,6 +379,9 @@ class HttpClient:
|
|
340
379
|
error_resolution: ErrorResolution,
|
341
380
|
exit_on_rate_limit: Optional[bool] = False,
|
342
381
|
) -> None:
|
382
|
+
if error_resolution.response_action not in self._ACTIONS_TO_RETRY_ON:
|
383
|
+
self._evict_key(request)
|
384
|
+
|
343
385
|
# Emit stream status RUNNING with the reason RATE_LIMITED to log that the rate limit has been reached
|
344
386
|
if error_resolution.response_action == ResponseAction.RATE_LIMITED:
|
345
387
|
# TODO: Update to handle with message repository when concurrent message repository is ready
|
@@ -360,13 +402,18 @@ class HttpClient:
|
|
360
402
|
|
361
403
|
if error_resolution.response_action == ResponseAction.FAIL:
|
362
404
|
if response is not None:
|
363
|
-
|
405
|
+
filtered_response_message = filter_secrets(
|
406
|
+
f"Request (body): '{str(request.body)}'. Response (body): '{self._get_response_body(response)}'. Response (headers): '{response.headers}'."
|
407
|
+
)
|
408
|
+
error_message = f"'{request.method}' request to '{request.url}' failed with status code '{response.status_code}' and error message: '{self._error_message_parser.parse_response_error_message(response)}'. {filtered_response_message}"
|
364
409
|
else:
|
365
410
|
error_message = (
|
366
411
|
f"'{request.method}' request to '{request.url}' failed with exception: '{exc}'"
|
367
412
|
)
|
368
413
|
|
369
|
-
|
414
|
+
# ensure the exception message is emitted before raised
|
415
|
+
self._logger.error(error_message)
|
416
|
+
|
370
417
|
raise MessageRepresentationAirbyteTracedErrors(
|
371
418
|
internal_message=error_message,
|
372
419
|
message=error_resolution.error_message or error_message,
|
@@ -390,7 +437,7 @@ class HttpClient:
|
|
390
437
|
for backoff_strategy in self._backoff_strategies:
|
391
438
|
backoff_time = backoff_strategy.backoff_time(
|
392
439
|
response_or_exception=response if response is not None else exc,
|
393
|
-
attempt_count=
|
440
|
+
attempt_count=self._request_attempt_count[request],
|
394
441
|
)
|
395
442
|
if backoff_time:
|
396
443
|
user_defined_backoff_time = backoff_time
|
@@ -10,12 +10,13 @@ from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
|
10
10
|
import backoff
|
11
11
|
import pendulum
|
12
12
|
import requests
|
13
|
+
from requests.auth import AuthBase
|
14
|
+
|
13
15
|
from airbyte_cdk.models import FailureType, Level
|
14
16
|
from airbyte_cdk.sources.http_logger import format_http_message
|
15
17
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
16
18
|
from airbyte_cdk.utils import AirbyteTracedException
|
17
19
|
from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets
|
18
|
-
from requests.auth import AuthBase
|
19
20
|
|
20
21
|
from ..exceptions import DefaultBackoffException
|
21
22
|
|
airbyte_cdk/sources/types.py
CHANGED
@@ -6,6 +6,8 @@ from __future__ import annotations
|
|
6
6
|
|
7
7
|
from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
|
8
8
|
|
9
|
+
import orjson
|
10
|
+
|
9
11
|
# A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
|
10
12
|
# "hello"}] returns "hello"
|
11
13
|
FieldPointer = List[str]
|
@@ -15,9 +17,17 @@ StreamState = Mapping[str, Any]
|
|
15
17
|
|
16
18
|
|
17
19
|
class Record(Mapping[str, Any]):
|
18
|
-
def __init__(
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
data: Mapping[str, Any],
|
23
|
+
stream_name: str,
|
24
|
+
associated_slice: Optional[StreamSlice] = None,
|
25
|
+
is_file_transfer_message: bool = False,
|
26
|
+
):
|
19
27
|
self._data = data
|
20
28
|
self._associated_slice = associated_slice
|
29
|
+
self.stream_name = stream_name
|
30
|
+
self.is_file_transfer_message = is_file_transfer_message
|
21
31
|
|
22
32
|
@property
|
23
33
|
def data(self) -> Mapping[str, Any]:
|
@@ -139,3 +149,6 @@ class StreamSlice(Mapping[str, Any]):
|
|
139
149
|
|
140
150
|
def __json_serializable__(self) -> Any:
|
141
151
|
return self._stream_slice
|
152
|
+
|
153
|
+
def __hash__(self) -> int:
|
154
|
+
return hash(orjson.dumps(self._stream_slice, option=orjson.OPT_SORT_KEYS))
|
@@ -10,12 +10,13 @@ import pkgutil
|
|
10
10
|
from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple
|
11
11
|
|
12
12
|
import jsonref
|
13
|
-
from airbyte_cdk.models import ConnectorSpecification, FailureType
|
14
|
-
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
15
13
|
from jsonschema import RefResolver, validate
|
16
14
|
from jsonschema.exceptions import ValidationError
|
17
15
|
from pydantic.v1 import BaseModel, Field
|
18
16
|
|
17
|
+
from airbyte_cdk.models import ConnectorSpecification, FailureType
|
18
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
19
|
+
|
19
20
|
|
20
21
|
class JsonFileLoader:
|
21
22
|
"""
|
airbyte_cdk/sql/secrets.py
CHANGED
@@ -6,9 +6,10 @@ from __future__ import annotations
|
|
6
6
|
import json
|
7
7
|
from typing import TYPE_CHECKING, Any
|
8
8
|
|
9
|
-
from airbyte_cdk.sql import exceptions as exc
|
10
9
|
from pydantic_core import CoreSchema, core_schema
|
11
10
|
|
11
|
+
from airbyte_cdk.sql import exceptions as exc
|
12
|
+
|
12
13
|
if TYPE_CHECKING:
|
13
14
|
from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, ValidationInfo
|
14
15
|
from pydantic.json_schema import JsonSchemaValue
|
@@ -13,6 +13,12 @@ from typing import TYPE_CHECKING, Any, final
|
|
13
13
|
import pandas as pd
|
14
14
|
import sqlalchemy
|
15
15
|
import ulid
|
16
|
+
from airbyte_protocol_dataclasses.models import AirbyteStateMessage
|
17
|
+
from pandas import Index
|
18
|
+
from pydantic import BaseModel, Field
|
19
|
+
from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
|
20
|
+
from sqlalchemy.exc import ProgrammingError, SQLAlchemyError
|
21
|
+
|
16
22
|
from airbyte_cdk.sql import exceptions as exc
|
17
23
|
from airbyte_cdk.sql._util.hashing import one_way_hash
|
18
24
|
from airbyte_cdk.sql._util.name_normalizers import LowerCaseNormalizer
|
@@ -24,16 +30,10 @@ from airbyte_cdk.sql.constants import (
|
|
24
30
|
)
|
25
31
|
from airbyte_cdk.sql.secrets import SecretString
|
26
32
|
from airbyte_cdk.sql.types import SQLTypeConverter
|
27
|
-
from airbyte_protocol_dataclasses.models import AirbyteStateMessage
|
28
|
-
from pandas import Index
|
29
|
-
from pydantic import BaseModel, Field
|
30
|
-
from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
|
31
|
-
from sqlalchemy.exc import ProgrammingError, SQLAlchemyError
|
32
33
|
|
33
34
|
if TYPE_CHECKING:
|
34
35
|
from collections.abc import Generator
|
35
36
|
|
36
|
-
from airbyte_cdk.sql.shared.catalog_providers import CatalogProvider
|
37
37
|
from sqlalchemy.engine import Connection, Engine
|
38
38
|
from sqlalchemy.engine.cursor import CursorResult
|
39
39
|
from sqlalchemy.engine.reflection import Inspector
|
@@ -41,6 +41,8 @@ if TYPE_CHECKING:
|
|
41
41
|
from sqlalchemy.sql.elements import TextClause
|
42
42
|
from sqlalchemy.sql.type_api import TypeEngine
|
43
43
|
|
44
|
+
from airbyte_cdk.sql.shared.catalog_providers import CatalogProvider
|
45
|
+
|
44
46
|
|
45
47
|
class SQLRuntimeError(Exception):
|
46
48
|
"""Raised when an SQL operation fails."""
|
@@ -23,6 +23,10 @@ from io import StringIO
|
|
23
23
|
from pathlib import Path
|
24
24
|
from typing import Any, List, Mapping, Optional, Union
|
25
25
|
|
26
|
+
from orjson import orjson
|
27
|
+
from pydantic import ValidationError as V2ValidationError
|
28
|
+
from serpyco_rs import SchemaValidationError
|
29
|
+
|
26
30
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
27
31
|
from airbyte_cdk.exception_handler import assemble_uncaught_exception
|
28
32
|
from airbyte_cdk.logger import AirbyteLogFormatter
|
@@ -40,9 +44,6 @@ from airbyte_cdk.models import (
|
|
40
44
|
Type,
|
41
45
|
)
|
42
46
|
from airbyte_cdk.sources import Source
|
43
|
-
from orjson import orjson
|
44
|
-
from pydantic import ValidationError as V2ValidationError
|
45
|
-
from serpyco_rs import SchemaValidationError
|
46
47
|
|
47
48
|
|
48
49
|
class EntrypointOutput:
|
@@ -5,11 +5,12 @@
|
|
5
5
|
from collections import defaultdict
|
6
6
|
from typing import Any, Dict, List, Mapping, Optional
|
7
7
|
|
8
|
-
from airbyte_cdk.models import AirbyteRecordMessage
|
9
8
|
from genson import SchemaBuilder, SchemaNode
|
10
9
|
from genson.schema.strategies.object import Object
|
11
10
|
from genson.schema.strategies.scalar import Number
|
12
11
|
|
12
|
+
from airbyte_cdk.models import AirbyteRecordMessage
|
13
|
+
|
13
14
|
# schema keywords
|
14
15
|
_TYPE = "type"
|
15
16
|
_NULL_TYPE = "null"
|
@@ -5,6 +5,8 @@ import time
|
|
5
5
|
import traceback
|
6
6
|
from typing import Optional
|
7
7
|
|
8
|
+
from orjson import orjson
|
9
|
+
|
8
10
|
from airbyte_cdk.models import (
|
9
11
|
AirbyteConnectionStatus,
|
10
12
|
AirbyteErrorTraceMessage,
|
@@ -18,7 +20,6 @@ from airbyte_cdk.models import (
|
|
18
20
|
)
|
19
21
|
from airbyte_cdk.models import Type as MessageType
|
20
22
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
21
|
-
from orjson import orjson
|
22
23
|
|
23
24
|
|
24
25
|
class AirbyteTracedException(Exception):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.7.
|
3
|
+
Version: 6.7.2.dev0
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -25,7 +25,7 @@ Requires-Dist: Deprecated (>=1.2,<1.3)
|
|
25
25
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
27
|
Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
|
28
|
-
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.
|
28
|
+
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
29
29
|
Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
|
30
30
|
Requires-Dist: backoff
|
31
31
|
Requires-Dist: cachetools
|
@@ -170,6 +170,13 @@ Installing all extras is required to run the full suite of unit tests.
|
|
170
170
|
|
171
171
|
To see all available scripts, run `poetry run poe`.
|
172
172
|
|
173
|
+
#### Formatting the code
|
174
|
+
|
175
|
+
- Iterate on the CDK code locally
|
176
|
+
- Run `poetry run ruff format` to format your changes.
|
177
|
+
|
178
|
+
To see all available `ruff` options, run `poetry run ruff`.
|
179
|
+
|
173
180
|
##### Autogenerated files
|
174
181
|
|
175
182
|
Low-code CDK models are generated from `sources/declarative/declarative_component_schema.yaml`. If
|