airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +17 -2
- airbyte_cdk/config_observation.py +10 -3
- airbyte_cdk/connector.py +19 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
- airbyte_cdk/connector_builder/main.py +26 -6
- airbyte_cdk/connector_builder/message_grouper.py +95 -25
- airbyte_cdk/destinations/destination.py +47 -14
- airbyte_cdk/destinations/vector_db_based/config.py +36 -14
- airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
- airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
- airbyte_cdk/entrypoint.py +82 -26
- airbyte_cdk/exception_handler.py +13 -3
- airbyte_cdk/logger.py +10 -2
- airbyte_cdk/models/airbyte_protocol.py +11 -5
- airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/abstract_source.py +63 -17
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
- airbyte_cdk/sources/connector_state_manager.py +32 -10
- airbyte_cdk/sources/declarative/async_job/job.py +3 -1
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
- airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
- airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/token.py +25 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
- airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
- airbyte_cdk/sources/declarative/declarative_source.py +3 -1
- airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
- airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
- airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
- airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
- airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
- airbyte_cdk/sources/declarative/spec/spec.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
- airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
- airbyte_cdk/sources/declarative/types.py +8 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
- airbyte_cdk/sources/embedded/base_integration.py +14 -4
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +3 -1
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +13 -15
- airbyte_cdk/sources/file_based/file_based_source.py +82 -24
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
- airbyte_cdk/sources/http_logger.py +5 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +17 -7
- airbyte_cdk/sources/streams/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/call_rate.py +63 -19
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
- airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
- airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
- airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
- airbyte_cdk/sources/streams/core.py +77 -22
- airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
- airbyte_cdk/sources/streams/http/exceptions.py +2 -2
- airbyte_cdk/sources/streams/http/http.py +133 -33
- airbyte_cdk/sources/streams/http/http_client.py +91 -29
- airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +5 -1
- airbyte_cdk/sources/utils/record_helper.py +12 -3
- airbyte_cdk/sources/utils/schema_helpers.py +9 -3
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +24 -9
- airbyte_cdk/sql/exceptions.py +19 -6
- airbyte_cdk/sql/secrets.py +3 -1
- airbyte_cdk/sql/shared/catalog_providers.py +13 -4
- airbyte_cdk/sql/shared/sql_processor.py +44 -14
- airbyte_cdk/test/catalog_builder.py +19 -8
- airbyte_cdk/test/entrypoint_wrapper.py +27 -8
- airbyte_cdk/test/mock_http/mocker.py +41 -11
- airbyte_cdk/test/mock_http/request.py +9 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +29 -7
- airbyte_cdk/test/state_builder.py +10 -2
- airbyte_cdk/test/utils/data.py +6 -2
- airbyte_cdk/test/utils/http_mocking.py +3 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +11 -4
- airbyte_cdk/utils/print_buffer.py +6 -1
- airbyte_cdk/utils/schema_inferrer.py +30 -9
- airbyte_cdk/utils/spec_schema_transformations.py +3 -1
- airbyte_cdk/utils/traced_exception.py +35 -9
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -6,7 +6,14 @@ import copy
|
|
6
6
|
from dataclasses import dataclass
|
7
7
|
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
8
8
|
|
9
|
-
from airbyte_cdk.models import
|
9
|
+
from airbyte_cdk.models import (
|
10
|
+
AirbyteMessage,
|
11
|
+
AirbyteStateBlob,
|
12
|
+
AirbyteStateMessage,
|
13
|
+
AirbyteStateType,
|
14
|
+
AirbyteStreamState,
|
15
|
+
StreamDescriptor,
|
16
|
+
)
|
10
17
|
from airbyte_cdk.models import Type as MessageType
|
11
18
|
|
12
19
|
|
@@ -42,19 +49,25 @@ class ConnectorStateManager:
|
|
42
49
|
)
|
43
50
|
self.per_stream_states = per_stream_states
|
44
51
|
|
45
|
-
def get_stream_state(
|
52
|
+
def get_stream_state(
|
53
|
+
self, stream_name: str, namespace: Optional[str]
|
54
|
+
) -> MutableMapping[str, Any]:
|
46
55
|
"""
|
47
56
|
Retrieves the state of a given stream based on its descriptor (name + namespace).
|
48
57
|
:param stream_name: Name of the stream being fetched
|
49
58
|
:param namespace: Namespace of the stream being fetched
|
50
59
|
:return: The per-stream state for a stream
|
51
60
|
"""
|
52
|
-
stream_state: AirbyteStateBlob | None = self.per_stream_states.get(
|
61
|
+
stream_state: AirbyteStateBlob | None = self.per_stream_states.get(
|
62
|
+
HashableStreamDescriptor(name=stream_name, namespace=namespace)
|
63
|
+
)
|
53
64
|
if stream_state:
|
54
65
|
return copy.deepcopy({k: v for k, v in stream_state.__dict__.items()})
|
55
66
|
return {}
|
56
67
|
|
57
|
-
def update_state_for_stream(
|
68
|
+
def update_state_for_stream(
|
69
|
+
self, stream_name: str, namespace: Optional[str], value: Mapping[str, Any]
|
70
|
+
) -> None:
|
58
71
|
"""
|
59
72
|
Overwrites the state blob of a specific stream based on the provided stream name and optional namespace
|
60
73
|
:param stream_name: The name of the stream whose state is being updated
|
@@ -79,7 +92,8 @@ class ConnectorStateManager:
|
|
79
92
|
state=AirbyteStateMessage(
|
80
93
|
type=AirbyteStateType.STREAM,
|
81
94
|
stream=AirbyteStreamState(
|
82
|
-
stream_descriptor=StreamDescriptor(name=stream_name, namespace=namespace),
|
95
|
+
stream_descriptor=StreamDescriptor(name=stream_name, namespace=namespace),
|
96
|
+
stream_state=stream_state,
|
83
97
|
),
|
84
98
|
),
|
85
99
|
)
|
@@ -88,7 +102,10 @@ class ConnectorStateManager:
|
|
88
102
|
def _extract_from_state_message(
|
89
103
|
cls,
|
90
104
|
state: Optional[List[AirbyteStateMessage]],
|
91
|
-
) -> Tuple[
|
105
|
+
) -> Tuple[
|
106
|
+
Optional[AirbyteStateBlob],
|
107
|
+
MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]],
|
108
|
+
]:
|
92
109
|
"""
|
93
110
|
Takes an incoming list of state messages or a global state message and extracts state attributes according to
|
94
111
|
type which can then be assigned to the new state manager being instantiated
|
@@ -105,7 +122,8 @@ class ConnectorStateManager:
|
|
105
122
|
shared_state = copy.deepcopy(global_state.shared_state, {}) # type: ignore[union-attr] # global_state has shared_state
|
106
123
|
streams = {
|
107
124
|
HashableStreamDescriptor(
|
108
|
-
name=per_stream_state.stream_descriptor.name,
|
125
|
+
name=per_stream_state.stream_descriptor.name,
|
126
|
+
namespace=per_stream_state.stream_descriptor.namespace,
|
109
127
|
): per_stream_state.stream_state
|
110
128
|
for per_stream_state in global_state.stream_states # type: ignore[union-attr] # global_state has shared_state
|
111
129
|
}
|
@@ -113,10 +131,12 @@ class ConnectorStateManager:
|
|
113
131
|
else:
|
114
132
|
streams = {
|
115
133
|
HashableStreamDescriptor(
|
116
|
-
name=per_stream_state.stream.stream_descriptor.name,
|
134
|
+
name=per_stream_state.stream.stream_descriptor.name,
|
135
|
+
namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor
|
117
136
|
): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state
|
118
137
|
for per_stream_state in state
|
119
|
-
if per_stream_state.type == AirbyteStateType.STREAM
|
138
|
+
if per_stream_state.type == AirbyteStateType.STREAM
|
139
|
+
and hasattr(per_stream_state, "stream") # type: ignore # state is always a list of AirbyteStateMessage if is_per_stream is True
|
120
140
|
}
|
121
141
|
return None, streams
|
122
142
|
|
@@ -130,5 +150,7 @@ class ConnectorStateManager:
|
|
130
150
|
)
|
131
151
|
|
132
152
|
@staticmethod
|
133
|
-
def _is_per_stream_state(
|
153
|
+
def _is_per_stream_state(
|
154
|
+
state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]],
|
155
|
+
) -> bool:
|
134
156
|
return isinstance(state, List)
|
@@ -18,7 +18,9 @@ class AsyncJob:
|
|
18
18
|
it and call `ApiJob.update_status`, `ApiJob.status` will not reflect the actual API side status.
|
19
19
|
"""
|
20
20
|
|
21
|
-
def __init__(
|
21
|
+
def __init__(
|
22
|
+
self, api_job_id: str, job_parameters: StreamSlice, timeout: Optional[timedelta] = None
|
23
|
+
) -> None:
|
22
24
|
self._api_job_id = api_job_id
|
23
25
|
self._job_parameters = job_parameters
|
24
26
|
self._status = AsyncJobStatus.RUNNING
|
@@ -6,13 +6,28 @@ import time
|
|
6
6
|
import traceback
|
7
7
|
import uuid
|
8
8
|
from datetime import timedelta
|
9
|
-
from typing import
|
9
|
+
from typing import (
|
10
|
+
Any,
|
11
|
+
Generator,
|
12
|
+
Generic,
|
13
|
+
Iterable,
|
14
|
+
List,
|
15
|
+
Mapping,
|
16
|
+
Optional,
|
17
|
+
Set,
|
18
|
+
Tuple,
|
19
|
+
Type,
|
20
|
+
TypeVar,
|
21
|
+
)
|
10
22
|
|
11
23
|
from airbyte_cdk import StreamSlice
|
12
24
|
from airbyte_cdk.logger import lazy_log
|
13
25
|
from airbyte_cdk.models import FailureType
|
14
26
|
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
15
|
-
from airbyte_cdk.sources.declarative.async_job.job_tracker import
|
27
|
+
from airbyte_cdk.sources.declarative.async_job.job_tracker import (
|
28
|
+
ConcurrentJobLimitReached,
|
29
|
+
JobTracker,
|
30
|
+
)
|
16
31
|
from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
|
17
32
|
from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
|
18
33
|
from airbyte_cdk.sources.message import MessageRepository
|
@@ -36,7 +51,12 @@ class AsyncPartition:
|
|
36
51
|
self._stream_slice = stream_slice
|
37
52
|
|
38
53
|
def has_reached_max_attempt(self) -> bool:
|
39
|
-
return any(
|
54
|
+
return any(
|
55
|
+
map(
|
56
|
+
lambda attempt_count: attempt_count >= self._MAX_NUMBER_OF_ATTEMPTS,
|
57
|
+
self._attempts_per_job.values(),
|
58
|
+
)
|
59
|
+
)
|
40
60
|
|
41
61
|
def replace_job(self, job_to_replace: AsyncJob, new_jobs: List[AsyncJob]) -> None:
|
42
62
|
current_attempt_count = self._attempts_per_job.pop(job_to_replace, None)
|
@@ -119,7 +139,12 @@ class LookaheadIterator(Generic[T]):
|
|
119
139
|
|
120
140
|
class AsyncJobOrchestrator:
|
121
141
|
_WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS = 5
|
122
|
-
_KNOWN_JOB_STATUSES = {
|
142
|
+
_KNOWN_JOB_STATUSES = {
|
143
|
+
AsyncJobStatus.COMPLETED,
|
144
|
+
AsyncJobStatus.FAILED,
|
145
|
+
AsyncJobStatus.RUNNING,
|
146
|
+
AsyncJobStatus.TIMED_OUT,
|
147
|
+
}
|
123
148
|
_RUNNING_ON_API_SIDE_STATUS = {AsyncJobStatus.RUNNING, AsyncJobStatus.TIMED_OUT}
|
124
149
|
|
125
150
|
def __init__(
|
@@ -176,7 +201,11 @@ class AsyncJobOrchestrator:
|
|
176
201
|
for partition in self._running_partitions:
|
177
202
|
self._replace_failed_jobs(partition)
|
178
203
|
|
179
|
-
if
|
204
|
+
if (
|
205
|
+
self._has_bulk_parent
|
206
|
+
and self._running_partitions
|
207
|
+
and self._slice_iterator.has_next()
|
208
|
+
):
|
180
209
|
LOGGER.debug(
|
181
210
|
"This AsyncJobOrchestrator is operating as a child of a bulk stream hence we limit the number of concurrent jobs on the child until there are no more parent slices to avoid the child taking all the API job budget"
|
182
211
|
)
|
@@ -192,7 +221,9 @@ class AsyncJobOrchestrator:
|
|
192
221
|
if at_least_one_slice_consumed_from_slice_iterator_during_current_iteration:
|
193
222
|
# this means a slice has been consumed but the job couldn't be create therefore we need to put it back at the beginning of the _slice_iterator
|
194
223
|
self._slice_iterator.add_at_the_beginning(_slice) # type: ignore # we know it's not None here because `ConcurrentJobLimitReached` happens during the for loop
|
195
|
-
LOGGER.debug(
|
224
|
+
LOGGER.debug(
|
225
|
+
"Waiting before creating more jobs as the limit of concurrent jobs has been reached. Will try again later..."
|
226
|
+
)
|
196
227
|
|
197
228
|
def _start_job(self, _slice: StreamSlice, previous_job_id: Optional[str] = None) -> AsyncJob:
|
198
229
|
if previous_job_id:
|
@@ -212,7 +243,9 @@ class AsyncJobOrchestrator:
|
|
212
243
|
raise exception
|
213
244
|
return self._keep_api_budget_with_failed_job(_slice, exception, id_to_replace)
|
214
245
|
|
215
|
-
def _keep_api_budget_with_failed_job(
|
246
|
+
def _keep_api_budget_with_failed_job(
|
247
|
+
self, _slice: StreamSlice, exception: Exception, intent: str
|
248
|
+
) -> AsyncJob:
|
216
249
|
"""
|
217
250
|
We have a mechanism to retry job. It is used when a job status is FAILED or TIMED_OUT. The easiest way to retry is to have this job
|
218
251
|
as created in a failed state and leverage the retry for failed/timed out jobs. This way, we don't have to have another process for
|
@@ -221,7 +254,11 @@ class AsyncJobOrchestrator:
|
|
221
254
|
LOGGER.warning(
|
222
255
|
f"Could not start job for slice {_slice}. Job will be flagged as failed and retried if max number of attempts not reached: {exception}"
|
223
256
|
)
|
224
|
-
traced_exception =
|
257
|
+
traced_exception = (
|
258
|
+
exception
|
259
|
+
if isinstance(exception, AirbyteTracedException)
|
260
|
+
else AirbyteTracedException.from_exception(exception)
|
261
|
+
)
|
225
262
|
# Even though we're not sure this will break the stream, we will emit here for simplicity's sake. If we wanted to be more accurate,
|
226
263
|
# we would keep the exceptions in-memory until we know that we have reached the max attempt.
|
227
264
|
self._message_repository.emit_message(traced_exception.as_airbyte_message())
|
@@ -241,7 +278,12 @@ class AsyncJobOrchestrator:
|
|
241
278
|
Returns:
|
242
279
|
Set[AsyncJob]: A set of AsyncJob objects that are currently running.
|
243
280
|
"""
|
244
|
-
return {
|
281
|
+
return {
|
282
|
+
job
|
283
|
+
for partition in self._running_partitions
|
284
|
+
for job in partition.jobs
|
285
|
+
if job.status() == AsyncJobStatus.RUNNING
|
286
|
+
}
|
245
287
|
|
246
288
|
def _update_jobs_status(self) -> None:
|
247
289
|
"""
|
@@ -283,14 +325,18 @@ class AsyncJobOrchestrator:
|
|
283
325
|
partition (AsyncPartition): The completed partition to process.
|
284
326
|
"""
|
285
327
|
job_ids = list(map(lambda job: job.api_job_id(), {job for job in partition.jobs}))
|
286
|
-
LOGGER.info(
|
328
|
+
LOGGER.info(
|
329
|
+
f"The following jobs for stream slice {partition.stream_slice} have been completed: {job_ids}."
|
330
|
+
)
|
287
331
|
|
288
332
|
# It is important to remove the jobs from the job tracker before yielding the partition as the caller might try to schedule jobs
|
289
333
|
# but won't be able to as all jobs slots are taken even though job is done.
|
290
334
|
for job in partition.jobs:
|
291
335
|
self._job_tracker.remove_job(job.api_job_id())
|
292
336
|
|
293
|
-
def _process_running_partitions_and_yield_completed_ones(
|
337
|
+
def _process_running_partitions_and_yield_completed_ones(
|
338
|
+
self,
|
339
|
+
) -> Generator[AsyncPartition, Any, None]:
|
294
340
|
"""
|
295
341
|
Process the running partitions.
|
296
342
|
|
@@ -392,7 +438,9 @@ class AsyncJobOrchestrator:
|
|
392
438
|
self._wait_on_status_update()
|
393
439
|
except Exception as exception:
|
394
440
|
if self._is_breaking_exception(exception):
|
395
|
-
LOGGER.warning(
|
441
|
+
LOGGER.warning(
|
442
|
+
f"Caught exception that stops the processing of the jobs: {exception}"
|
443
|
+
)
|
396
444
|
self._abort_all_running_jobs()
|
397
445
|
raise exception
|
398
446
|
|
@@ -406,7 +454,12 @@ class AsyncJobOrchestrator:
|
|
406
454
|
# call of `create_and_get_completed_partitions` knows that there was an issue with some partitions and the sync is incomplete.
|
407
455
|
raise AirbyteTracedException(
|
408
456
|
message="",
|
409
|
-
internal_message="\n".join(
|
457
|
+
internal_message="\n".join(
|
458
|
+
[
|
459
|
+
filter_secrets(exception.__repr__())
|
460
|
+
for exception in self._non_breaking_exceptions
|
461
|
+
]
|
462
|
+
),
|
410
463
|
failure_type=FailureType.config_error,
|
411
464
|
)
|
412
465
|
|
@@ -425,7 +478,8 @@ class AsyncJobOrchestrator:
|
|
425
478
|
|
426
479
|
def _is_breaking_exception(self, exception: Exception) -> bool:
|
427
480
|
return isinstance(exception, self._exceptions_to_break_on) or (
|
428
|
-
isinstance(exception, AirbyteTracedException)
|
481
|
+
isinstance(exception, AirbyteTracedException)
|
482
|
+
and exception.failure_type == FailureType.config_error
|
429
483
|
)
|
430
484
|
|
431
485
|
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
@@ -21,25 +21,39 @@ class JobTracker:
|
|
21
21
|
self._lock = threading.Lock()
|
22
22
|
|
23
23
|
def try_to_get_intent(self) -> str:
|
24
|
-
lazy_log(
|
24
|
+
lazy_log(
|
25
|
+
LOGGER,
|
26
|
+
logging.DEBUG,
|
27
|
+
lambda: f"JobTracker - Trying to acquire lock by thread {threading.get_native_id()}...",
|
28
|
+
)
|
25
29
|
with self._lock:
|
26
30
|
if self._has_reached_limit():
|
27
|
-
raise ConcurrentJobLimitReached(
|
31
|
+
raise ConcurrentJobLimitReached(
|
32
|
+
"Can't allocate more jobs right now: limit already reached"
|
33
|
+
)
|
28
34
|
intent = f"intent_{str(uuid.uuid4())}"
|
29
|
-
lazy_log(
|
35
|
+
lazy_log(
|
36
|
+
LOGGER,
|
37
|
+
logging.DEBUG,
|
38
|
+
lambda: f"JobTracker - Thread {threading.get_native_id()} has acquired {intent}!",
|
39
|
+
)
|
30
40
|
self._jobs.add(intent)
|
31
41
|
return intent
|
32
42
|
|
33
43
|
def add_job(self, intent_or_job_id: str, job_id: str) -> None:
|
34
44
|
if intent_or_job_id not in self._jobs:
|
35
|
-
raise ValueError(
|
45
|
+
raise ValueError(
|
46
|
+
f"Can't add job: Unknown intent or job id, known values are {self._jobs}"
|
47
|
+
)
|
36
48
|
|
37
49
|
if intent_or_job_id == job_id:
|
38
50
|
# Nothing to do here as the ID to replace is the same
|
39
51
|
return
|
40
52
|
|
41
53
|
lazy_log(
|
42
|
-
LOGGER,
|
54
|
+
LOGGER,
|
55
|
+
logging.DEBUG,
|
56
|
+
lambda: f"JobTracker - Thread {threading.get_native_id()} replacing job {intent_or_job_id} by {job_id}!",
|
43
57
|
)
|
44
58
|
with self._lock:
|
45
59
|
self._jobs.add(job_id)
|
@@ -49,7 +63,11 @@ class JobTracker:
|
|
49
63
|
"""
|
50
64
|
If the job is not allocated as a running job, this method does nothing and it won't raise.
|
51
65
|
"""
|
52
|
-
lazy_log(
|
66
|
+
lazy_log(
|
67
|
+
LOGGER,
|
68
|
+
logging.DEBUG,
|
69
|
+
lambda: f"JobTracker - Thread {threading.get_native_id()} removing job {job_id}",
|
70
|
+
)
|
53
71
|
with self._lock:
|
54
72
|
self._jobs.discard(job_id)
|
55
73
|
|
@@ -26,7 +26,9 @@ class AsyncJobRepository:
|
|
26
26
|
Called when we need to stop on the API side. This method can raise NotImplementedError as not all the APIs will support aborting
|
27
27
|
jobs.
|
28
28
|
"""
|
29
|
-
raise NotImplementedError(
|
29
|
+
raise NotImplementedError(
|
30
|
+
"Either the API or the AsyncJobRepository implementation do not support aborting jobs"
|
31
|
+
)
|
30
32
|
|
31
33
|
@abstractmethod
|
32
34
|
def delete(self, job: AsyncJob) -> None:
|
@@ -5,7 +5,9 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
6
|
from typing import Any, Mapping, Union
|
7
7
|
|
8
|
-
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import
|
8
|
+
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import (
|
9
|
+
AbstractHeaderAuthenticator,
|
10
|
+
)
|
9
11
|
|
10
12
|
|
11
13
|
@dataclass
|
@@ -75,22 +75,32 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
75
75
|
|
76
76
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
77
77
|
self._secret_key = InterpolatedString.create(self.secret_key, parameters=parameters)
|
78
|
-
self._algorithm =
|
78
|
+
self._algorithm = (
|
79
|
+
JwtAlgorithm(self.algorithm) if isinstance(self.algorithm, str) else self.algorithm
|
80
|
+
)
|
79
81
|
self._base64_encode_secret_key = (
|
80
82
|
InterpolatedBoolean(self.base64_encode_secret_key, parameters=parameters)
|
81
83
|
if isinstance(self.base64_encode_secret_key, str)
|
82
84
|
else self.base64_encode_secret_key
|
83
85
|
)
|
84
86
|
self._token_duration = self.token_duration
|
85
|
-
self._header_prefix =
|
87
|
+
self._header_prefix = (
|
88
|
+
InterpolatedString.create(self.header_prefix, parameters=parameters)
|
89
|
+
if self.header_prefix
|
90
|
+
else None
|
91
|
+
)
|
86
92
|
self._kid = InterpolatedString.create(self.kid, parameters=parameters) if self.kid else None
|
87
93
|
self._typ = InterpolatedString.create(self.typ, parameters=parameters) if self.typ else None
|
88
94
|
self._cty = InterpolatedString.create(self.cty, parameters=parameters) if self.cty else None
|
89
95
|
self._iss = InterpolatedString.create(self.iss, parameters=parameters) if self.iss else None
|
90
96
|
self._sub = InterpolatedString.create(self.sub, parameters=parameters) if self.sub else None
|
91
97
|
self._aud = InterpolatedString.create(self.aud, parameters=parameters) if self.aud else None
|
92
|
-
self._additional_jwt_headers = InterpolatedMapping(
|
93
|
-
|
98
|
+
self._additional_jwt_headers = InterpolatedMapping(
|
99
|
+
self.additional_jwt_headers or {}, parameters=parameters
|
100
|
+
)
|
101
|
+
self._additional_jwt_payload = InterpolatedMapping(
|
102
|
+
self.additional_jwt_payload or {}, parameters=parameters
|
103
|
+
)
|
94
104
|
|
95
105
|
def _get_jwt_headers(self) -> dict[str, Any]:
|
96
106
|
""" "
|
@@ -98,7 +108,9 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
98
108
|
"""
|
99
109
|
headers = self._additional_jwt_headers.eval(self.config)
|
100
110
|
if any(prop in headers for prop in ["kid", "alg", "typ", "cty"]):
|
101
|
-
raise ValueError(
|
111
|
+
raise ValueError(
|
112
|
+
"'kid', 'alg', 'typ', 'cty' are reserved headers and should not be set as part of 'additional_jwt_headers'"
|
113
|
+
)
|
102
114
|
|
103
115
|
if self._kid:
|
104
116
|
headers["kid"] = self._kid.eval(self.config)
|
@@ -139,7 +151,11 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
139
151
|
Returns the secret key used to sign the JWT.
|
140
152
|
"""
|
141
153
|
secret_key: str = self._secret_key.eval(self.config)
|
142
|
-
return
|
154
|
+
return (
|
155
|
+
base64.b64encode(secret_key.encode()).decode()
|
156
|
+
if self._base64_encode_secret_key
|
157
|
+
else secret_key
|
158
|
+
)
|
143
159
|
|
144
160
|
def _get_signed_token(self) -> Union[str, Any]:
|
145
161
|
"""
|
@@ -167,4 +183,8 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
167
183
|
|
168
184
|
@property
|
169
185
|
def token(self) -> str:
|
170
|
-
return
|
186
|
+
return (
|
187
|
+
f"{self._get_header_prefix()} {self._get_signed_token()}"
|
188
|
+
if self._get_header_prefix()
|
189
|
+
else self._get_signed_token()
|
190
|
+
)
|
@@ -10,8 +10,12 @@ from airbyte_cdk.sources.declarative.auth.declarative_authenticator import Decla
|
|
10
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
11
11
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
12
12
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
13
|
-
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import
|
14
|
-
|
13
|
+
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import (
|
14
|
+
AbstractOauth2Authenticator,
|
15
|
+
)
|
16
|
+
from airbyte_cdk.sources.streams.http.requests_native_auth.oauth import (
|
17
|
+
SingleUseRefreshTokenOauth2Authenticator,
|
18
|
+
)
|
15
19
|
|
16
20
|
|
17
21
|
@dataclass
|
@@ -57,31 +61,49 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
57
61
|
|
58
62
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
59
63
|
super().__init__()
|
60
|
-
self._token_refresh_endpoint = InterpolatedString.create(
|
64
|
+
self._token_refresh_endpoint = InterpolatedString.create(
|
65
|
+
self.token_refresh_endpoint, parameters=parameters
|
66
|
+
)
|
61
67
|
self._client_id = InterpolatedString.create(self.client_id, parameters=parameters)
|
62
68
|
self._client_secret = InterpolatedString.create(self.client_secret, parameters=parameters)
|
63
69
|
if self.refresh_token is not None:
|
64
|
-
self._refresh_token: Optional[InterpolatedString] = InterpolatedString.create(
|
70
|
+
self._refresh_token: Optional[InterpolatedString] = InterpolatedString.create(
|
71
|
+
self.refresh_token, parameters=parameters
|
72
|
+
)
|
65
73
|
else:
|
66
74
|
self._refresh_token = None
|
67
|
-
self.access_token_name = InterpolatedString.create(
|
68
|
-
|
75
|
+
self.access_token_name = InterpolatedString.create(
|
76
|
+
self.access_token_name, parameters=parameters
|
77
|
+
)
|
78
|
+
self.expires_in_name = InterpolatedString.create(
|
79
|
+
self.expires_in_name, parameters=parameters
|
80
|
+
)
|
69
81
|
self.grant_type = InterpolatedString.create(self.grant_type, parameters=parameters)
|
70
|
-
self._refresh_request_body = InterpolatedMapping(
|
82
|
+
self._refresh_request_body = InterpolatedMapping(
|
83
|
+
self.refresh_request_body or {}, parameters=parameters
|
84
|
+
)
|
71
85
|
self._token_expiry_date: pendulum.DateTime = (
|
72
|
-
pendulum.parse(
|
86
|
+
pendulum.parse(
|
87
|
+
InterpolatedString.create(self.token_expiry_date, parameters=parameters).eval(
|
88
|
+
self.config
|
89
|
+
)
|
90
|
+
) # type: ignore # pendulum.parse returns a datetime in this context
|
73
91
|
if self.token_expiry_date
|
74
92
|
else pendulum.now().subtract(days=1) # type: ignore # substract does not have type hints
|
75
93
|
)
|
76
94
|
self._access_token: Optional[str] = None # access_token is initialized by a setter
|
77
95
|
|
78
96
|
if self.get_grant_type() == "refresh_token" and self._refresh_token is None:
|
79
|
-
raise ValueError(
|
97
|
+
raise ValueError(
|
98
|
+
"OAuthAuthenticator needs a refresh_token parameter if grant_type is set to `refresh_token`"
|
99
|
+
)
|
80
100
|
|
81
101
|
def get_token_refresh_endpoint(self) -> str:
|
82
102
|
refresh_token: str = self._token_refresh_endpoint.eval(self.config)
|
83
103
|
if not refresh_token:
|
84
|
-
raise ValueError(
|
104
|
+
raise ValueError(
|
105
|
+
"OAuthAuthenticator was unable to evaluate token_refresh_endpoint parameter"
|
106
|
+
)
|
85
107
|
return refresh_token
|
86
108
|
|
87
109
|
def get_client_id(self) -> str:
|
@@ -139,7 +161,9 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
139
161
|
|
140
162
|
|
141
163
|
@dataclass
|
142
|
-
class DeclarativeSingleUseRefreshTokenOauth2Authenticator(
|
164
|
+
class DeclarativeSingleUseRefreshTokenOauth2Authenticator(
|
165
|
+
SingleUseRefreshTokenOauth2Authenticator, DeclarativeAuthenticator
|
166
|
+
):
|
143
167
|
"""
|
144
168
|
Declarative version of SingleUseRefreshTokenOauth2Authenticator which can be used in declarative connectors.
|
145
169
|
"""
|
@@ -29,7 +29,9 @@ class SelectiveAuthenticator(DeclarativeAuthenticator):
|
|
29
29
|
try:
|
30
30
|
selected_key = str(dpath.get(config, authenticator_selection_path))
|
31
31
|
except KeyError as err:
|
32
|
-
raise ValueError(
|
32
|
+
raise ValueError(
|
33
|
+
"The path from `authenticator_selection_path` is not found in the config."
|
34
|
+
) from err
|
33
35
|
|
34
36
|
try:
|
35
37
|
return authenticators[selected_key]
|
@@ -11,7 +11,10 @@ import requests
|
|
11
11
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
12
12
|
from airbyte_cdk.sources.declarative.auth.token_provider import TokenProvider
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
|
-
from airbyte_cdk.sources.declarative.requesters.request_option import
|
14
|
+
from airbyte_cdk.sources.declarative.requesters.request_option import (
|
15
|
+
RequestOption,
|
16
|
+
RequestOptionType,
|
17
|
+
)
|
15
18
|
from airbyte_cdk.sources.types import Config
|
16
19
|
from cachetools import TTLCache, cached
|
17
20
|
|
@@ -42,7 +45,9 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator):
|
|
42
45
|
parameters: InitVar[Mapping[str, Any]]
|
43
46
|
|
44
47
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
45
|
-
self._field_name = InterpolatedString.create(
|
48
|
+
self._field_name = InterpolatedString.create(
|
49
|
+
self.request_option.field_name, parameters=parameters
|
50
|
+
)
|
46
51
|
|
47
52
|
@property
|
48
53
|
def auth_header(self) -> str:
|
@@ -127,7 +132,9 @@ class BasicHttpAuthenticator(DeclarativeAuthenticator):
|
|
127
132
|
|
128
133
|
@property
|
129
134
|
def token(self) -> str:
|
130
|
-
auth_string =
|
135
|
+
auth_string = (
|
136
|
+
f"{self._username.eval(self.config)}:{self._password.eval(self.config)}".encode("utf8")
|
137
|
+
)
|
131
138
|
b64_encoded = base64.b64encode(auth_string).decode("utf8")
|
132
139
|
return f"Basic {b64_encoded}"
|
133
140
|
|
@@ -164,7 +171,9 @@ def get_new_session_token(api_url: str, username: str, password: str, response_k
|
|
164
171
|
)
|
165
172
|
response.raise_for_status()
|
166
173
|
if not response.ok:
|
167
|
-
raise ConnectionError(
|
174
|
+
raise ConnectionError(
|
175
|
+
f"Failed to retrieve new session token, response code {response.status_code} because {response.reason}"
|
176
|
+
)
|
168
177
|
return str(response.json()[response_key])
|
169
178
|
|
170
179
|
|
@@ -208,9 +217,13 @@ class LegacySessionTokenAuthenticator(DeclarativeAuthenticator):
|
|
208
217
|
self._api_url = InterpolatedString.create(self.api_url, parameters=parameters)
|
209
218
|
self._header = InterpolatedString.create(self.header, parameters=parameters)
|
210
219
|
self._session_token = InterpolatedString.create(self.session_token, parameters=parameters)
|
211
|
-
self._session_token_response_key = InterpolatedString.create(
|
220
|
+
self._session_token_response_key = InterpolatedString.create(
|
221
|
+
self.session_token_response_key, parameters=parameters
|
222
|
+
)
|
212
223
|
self._login_url = InterpolatedString.create(self.login_url, parameters=parameters)
|
213
|
-
self._validate_session_url = InterpolatedString.create(
|
224
|
+
self._validate_session_url = InterpolatedString.create(
|
225
|
+
self.validate_session_url, parameters=parameters
|
226
|
+
)
|
214
227
|
|
215
228
|
self.logger = logging.getLogger("airbyte")
|
216
229
|
|
@@ -232,7 +245,9 @@ class LegacySessionTokenAuthenticator(DeclarativeAuthenticator):
|
|
232
245
|
self.logger.info("Using generated session token by username and password")
|
233
246
|
return get_new_session_token(api_url, username, password, session_token_response_key)
|
234
247
|
|
235
|
-
raise ConnectionError(
|
248
|
+
raise ConnectionError(
|
249
|
+
"Invalid credentials: session token is not valid or provide username and password"
|
250
|
+
)
|
236
251
|
|
237
252
|
def is_valid_session_token(self) -> bool:
|
238
253
|
try:
|
@@ -251,4 +266,6 @@ class LegacySessionTokenAuthenticator(DeclarativeAuthenticator):
|
|
251
266
|
self.logger.info("Connection check for source is successful.")
|
252
267
|
return True
|
253
268
|
else:
|
254
|
-
raise ConnectionError(
|
269
|
+
raise ConnectionError(
|
270
|
+
f"Failed to retrieve new session token, response code {response.status_code} because {response.reason}"
|
271
|
+
)
|
@@ -27,22 +27,30 @@ class CheckStream(ConnectionChecker):
|
|
27
27
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
28
28
|
self._parameters = parameters
|
29
29
|
|
30
|
-
def check_connection(
|
30
|
+
def check_connection(
|
31
|
+
self, source: AbstractSource, logger: logging.Logger, config: Mapping[str, Any]
|
32
|
+
) -> Tuple[bool, Any]:
|
31
33
|
streams = source.streams(config=config)
|
32
34
|
stream_name_to_stream = {s.name: s for s in streams}
|
33
35
|
if len(streams) == 0:
|
34
36
|
return False, f"No streams to connect to from source {source}"
|
35
37
|
for stream_name in self.stream_names:
|
36
38
|
if stream_name not in stream_name_to_stream.keys():
|
37
|
-
raise ValueError(
|
39
|
+
raise ValueError(
|
40
|
+
f"{stream_name} is not part of the catalog. Expected one of {stream_name_to_stream.keys()}."
|
41
|
+
)
|
38
42
|
|
39
43
|
stream = stream_name_to_stream[stream_name]
|
40
44
|
availability_strategy = HttpAvailabilityStrategy()
|
41
45
|
try:
|
42
|
-
stream_is_available, reason = availability_strategy.check_availability(
|
46
|
+
stream_is_available, reason = availability_strategy.check_availability(
|
47
|
+
stream, logger
|
48
|
+
)
|
43
49
|
if not stream_is_available:
|
44
50
|
return False, reason
|
45
51
|
except Exception as error:
|
46
|
-
logger.error(
|
52
|
+
logger.error(
|
53
|
+
f"Encountered an error trying to connect to stream {stream_name}. Error: \n {traceback.format_exc()}"
|
54
|
+
)
|
47
55
|
return False, f"Unable to connect to stream {stream_name} - {error}"
|
48
56
|
return True, None
|