airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +17 -2
- airbyte_cdk/config_observation.py +10 -3
- airbyte_cdk/connector.py +19 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
- airbyte_cdk/connector_builder/main.py +26 -6
- airbyte_cdk/connector_builder/message_grouper.py +95 -25
- airbyte_cdk/destinations/destination.py +47 -14
- airbyte_cdk/destinations/vector_db_based/config.py +36 -14
- airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
- airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
- airbyte_cdk/entrypoint.py +82 -26
- airbyte_cdk/exception_handler.py +13 -3
- airbyte_cdk/logger.py +10 -2
- airbyte_cdk/models/airbyte_protocol.py +11 -5
- airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/abstract_source.py +63 -17
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
- airbyte_cdk/sources/connector_state_manager.py +32 -10
- airbyte_cdk/sources/declarative/async_job/job.py +3 -1
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
- airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
- airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/token.py +25 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
- airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
- airbyte_cdk/sources/declarative/declarative_source.py +3 -1
- airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
- airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +7 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +656 -678
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +782 -232
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
- airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
- airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
- airbyte_cdk/sources/declarative/spec/spec.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
- airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
- airbyte_cdk/sources/declarative/types.py +8 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
- airbyte_cdk/sources/embedded/base_integration.py +14 -4
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +3 -1
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +13 -15
- airbyte_cdk/sources/file_based/file_based_source.py +82 -24
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
- airbyte_cdk/sources/http_logger.py +5 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +17 -7
- airbyte_cdk/sources/streams/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/call_rate.py +63 -19
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
- airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
- airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
- airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
- airbyte_cdk/sources/streams/core.py +77 -22
- airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
- airbyte_cdk/sources/streams/http/exceptions.py +2 -2
- airbyte_cdk/sources/streams/http/http.py +133 -33
- airbyte_cdk/sources/streams/http/http_client.py +91 -29
- airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +5 -1
- airbyte_cdk/sources/utils/record_helper.py +12 -3
- airbyte_cdk/sources/utils/schema_helpers.py +9 -3
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +24 -9
- airbyte_cdk/sql/exceptions.py +19 -6
- airbyte_cdk/sql/secrets.py +3 -1
- airbyte_cdk/sql/shared/catalog_providers.py +13 -4
- airbyte_cdk/sql/shared/sql_processor.py +44 -14
- airbyte_cdk/test/catalog_builder.py +19 -8
- airbyte_cdk/test/entrypoint_wrapper.py +27 -8
- airbyte_cdk/test/mock_http/mocker.py +41 -11
- airbyte_cdk/test/mock_http/request.py +9 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +29 -7
- airbyte_cdk/test/state_builder.py +10 -2
- airbyte_cdk/test/utils/data.py +6 -2
- airbyte_cdk/test/utils/http_mocking.py +3 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +11 -4
- airbyte_cdk/utils/print_buffer.py +6 -1
- airbyte_cdk/utils/schema_inferrer.py +30 -9
- airbyte_cdk/utils/spec_schema_transformations.py +3 -1
- airbyte_cdk/utils/traced_exception.py +35 -9
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/METADATA +7 -6
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/RECORD +198 -198
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/WHEEL +0 -0
@@ -38,7 +38,11 @@ class InterpolatedMapping:
|
|
38
38
|
valid_value_types = additional_parameters.pop("valid_value_types", None)
|
39
39
|
return {
|
40
40
|
self._interpolation.eval(
|
41
|
-
name,
|
41
|
+
name,
|
42
|
+
config,
|
43
|
+
valid_types=valid_key_types,
|
44
|
+
parameters=self._parameters,
|
45
|
+
**additional_parameters,
|
42
46
|
): self._eval(value, config, valid_types=valid_value_types, **additional_parameters)
|
43
47
|
for name, value in self.mapping.items()
|
44
48
|
}
|
@@ -9,7 +9,9 @@ from typing import Any, Mapping, Optional, Union
|
|
9
9
|
from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation
|
10
10
|
from airbyte_cdk.sources.types import Config
|
11
11
|
|
12
|
-
NestedMappingEntry = Union[
|
12
|
+
NestedMappingEntry = Union[
|
13
|
+
dict[str, "NestedMapping"], list["NestedMapping"], str, int, float, bool, None
|
14
|
+
]
|
13
15
|
NestedMapping = Union[dict[str, NestedMappingEntry], str]
|
14
16
|
|
15
17
|
|
@@ -32,12 +34,17 @@ class InterpolatedNestedMapping:
|
|
32
34
|
def eval(self, config: Config, **additional_parameters: Any) -> Any:
|
33
35
|
return self._eval(self.mapping, config, **additional_parameters)
|
34
36
|
|
35
|
-
def _eval(
|
37
|
+
def _eval(
|
38
|
+
self, value: Union[NestedMapping, NestedMappingEntry], config: Config, **kwargs: Any
|
39
|
+
) -> Any:
|
36
40
|
# Recursively interpolate dictionaries and lists
|
37
41
|
if isinstance(value, str):
|
38
42
|
return self._interpolation.eval(value, config, parameters=self._parameters, **kwargs)
|
39
43
|
elif isinstance(value, dict):
|
40
|
-
interpolated_dict = {
|
44
|
+
interpolated_dict = {
|
45
|
+
self._eval(k, config, **kwargs): self._eval(v, config, **kwargs)
|
46
|
+
for k, v in value.items()
|
47
|
+
}
|
41
48
|
return {k: v for k, v in interpolated_dict.items() if v is not None}
|
42
49
|
elif isinstance(value, list):
|
43
50
|
return [self._eval(v, config, **kwargs) for v in value]
|
@@ -45,10 +45,14 @@ class InterpolatedString:
|
|
45
45
|
if self._is_plain_string is None:
|
46
46
|
# Let's check whether output from evaluation is the same as input.
|
47
47
|
# This indicates occurrence of a plain string, not a template and we can skip Jinja in subsequent runs.
|
48
|
-
evaluated = self._interpolation.eval(
|
48
|
+
evaluated = self._interpolation.eval(
|
49
|
+
self.string, config, self.default, parameters=self._parameters, **kwargs
|
50
|
+
)
|
49
51
|
self._is_plain_string = self.string == evaluated
|
50
52
|
return evaluated
|
51
|
-
return self._interpolation.eval(
|
53
|
+
return self._interpolation.eval(
|
54
|
+
self.string, config, self.default, parameters=self._parameters, **kwargs
|
55
|
+
)
|
52
56
|
|
53
57
|
def __eq__(self, other: Any) -> bool:
|
54
58
|
if not isinstance(other, InterpolatedString):
|
@@ -14,7 +14,13 @@ class Interpolation(ABC):
|
|
14
14
|
"""
|
15
15
|
|
16
16
|
@abstractmethod
|
17
|
-
def eval(
|
17
|
+
def eval(
|
18
|
+
self,
|
19
|
+
input_str: str,
|
20
|
+
config: Config,
|
21
|
+
default: Optional[str] = None,
|
22
|
+
**additional_options: Any,
|
23
|
+
) -> Any:
|
18
24
|
"""
|
19
25
|
Interpolates the input string using the config, and additional options passed as parameter.
|
20
26
|
|
@@ -61,7 +61,9 @@ class JinjaInterpolation(Interpolation):
|
|
61
61
|
# By default, these Python builtin functions are available in the Jinja context.
|
62
62
|
# We explicitely remove them because of the potential security risk.
|
63
63
|
# Please add a unit test to test_jinja.py when adding a restriction.
|
64
|
-
RESTRICTED_BUILTIN_FUNCTIONS = [
|
64
|
+
RESTRICTED_BUILTIN_FUNCTIONS = [
|
65
|
+
"range"
|
66
|
+
] # The range function can cause very expensive computations
|
65
67
|
|
66
68
|
def __init__(self) -> None:
|
67
69
|
self._environment = StreamPartitionAccessEnvironment()
|
@@ -119,7 +121,9 @@ class JinjaInterpolation(Interpolation):
|
|
119
121
|
undeclared = self._find_undeclared_variables(s)
|
120
122
|
undeclared_not_in_context = {var for var in undeclared if var not in context}
|
121
123
|
if undeclared_not_in_context:
|
122
|
-
raise ValueError(
|
124
|
+
raise ValueError(
|
125
|
+
f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}"
|
126
|
+
)
|
123
127
|
return self._compile(s).render(context) # type: ignore # from_string is able to handle None
|
124
128
|
except TypeError:
|
125
129
|
# The string is a static value, not a jinja template
|
@@ -104,7 +104,9 @@ def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str:
|
|
104
104
|
:param num_days: number of days to add to current date time
|
105
105
|
:return: datetime formatted as RFC3339
|
106
106
|
"""
|
107
|
-
return (
|
107
|
+
return (
|
108
|
+
datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=num_days)
|
109
|
+
).strftime(format)
|
108
110
|
|
109
111
|
|
110
112
|
def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
|
@@ -117,7 +119,9 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]:
|
|
117
119
|
return parse_duration(datestring) # type: ignore # mypy thinks this returns Any for some reason
|
118
120
|
|
119
121
|
|
120
|
-
def format_datetime(
|
122
|
+
def format_datetime(
|
123
|
+
dt: Union[str, datetime.datetime], format: str, input_format: Optional[str] = None
|
124
|
+
) -> str:
|
121
125
|
"""
|
122
126
|
Converts datetime to another format
|
123
127
|
|
@@ -130,11 +134,22 @@ def format_datetime(dt: Union[str, datetime.datetime], format: str, input_format
|
|
130
134
|
"""
|
131
135
|
if isinstance(dt, datetime.datetime):
|
132
136
|
return dt.strftime(format)
|
133
|
-
dt_datetime =
|
137
|
+
dt_datetime = (
|
138
|
+
datetime.datetime.strptime(dt, input_format) if input_format else _str_to_datetime(dt)
|
139
|
+
)
|
134
140
|
if format == "%s":
|
135
141
|
return str(int(dt_datetime.timestamp()))
|
136
142
|
return dt_datetime.strftime(format)
|
137
143
|
|
138
144
|
|
139
|
-
_macros_list = [
|
145
|
+
_macros_list = [
|
146
|
+
now_utc,
|
147
|
+
today_utc,
|
148
|
+
timestamp,
|
149
|
+
max,
|
150
|
+
day_delta,
|
151
|
+
duration,
|
152
|
+
format_datetime,
|
153
|
+
today_with_timezone,
|
154
|
+
]
|
140
155
|
macros = {f.__name__: f for f in _macros_list}
|
@@ -20,16 +20,30 @@ from airbyte_cdk.models import (
|
|
20
20
|
)
|
21
21
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
22
22
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
23
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
24
|
-
|
23
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
24
|
+
CheckStream as CheckStreamModel,
|
25
|
+
)
|
26
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
27
|
+
DeclarativeStream as DeclarativeStreamModel,
|
28
|
+
)
|
25
29
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
26
|
-
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import
|
27
|
-
|
28
|
-
|
30
|
+
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
|
31
|
+
ManifestComponentTransformer,
|
32
|
+
)
|
33
|
+
from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import (
|
34
|
+
ManifestReferenceResolver,
|
35
|
+
)
|
36
|
+
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
37
|
+
ModelToComponentFactory,
|
38
|
+
)
|
29
39
|
from airbyte_cdk.sources.message import MessageRepository
|
30
40
|
from airbyte_cdk.sources.streams.core import Stream
|
31
41
|
from airbyte_cdk.sources.types import ConnectionDefinition
|
32
|
-
from airbyte_cdk.sources.utils.slice_logger import
|
42
|
+
from airbyte_cdk.sources.utils.slice_logger import (
|
43
|
+
AlwaysLogSliceLogger,
|
44
|
+
DebugSliceLogger,
|
45
|
+
SliceLogger,
|
46
|
+
)
|
33
47
|
from jsonschema.exceptions import ValidationError
|
34
48
|
from jsonschema.validators import validate
|
35
49
|
|
@@ -57,13 +71,21 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
57
71
|
manifest["type"] = "DeclarativeSource"
|
58
72
|
|
59
73
|
resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
|
60
|
-
propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
|
74
|
+
propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
|
75
|
+
"", resolved_source_config, {}
|
76
|
+
)
|
61
77
|
self._source_config = propagated_source_config
|
62
78
|
self._debug = debug
|
63
79
|
self._emit_connector_builder_messages = emit_connector_builder_messages
|
64
|
-
self._constructor =
|
80
|
+
self._constructor = (
|
81
|
+
component_factory
|
82
|
+
if component_factory
|
83
|
+
else ModelToComponentFactory(emit_connector_builder_messages)
|
84
|
+
)
|
65
85
|
self._message_repository = self._constructor.get_message_repository()
|
66
|
-
self._slice_logger: SliceLogger =
|
86
|
+
self._slice_logger: SliceLogger = (
|
87
|
+
AlwaysLogSliceLogger() if emit_connector_builder_messages else DebugSliceLogger()
|
88
|
+
)
|
67
89
|
|
68
90
|
self._validate_source()
|
69
91
|
|
@@ -81,20 +103,30 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
81
103
|
if "type" not in check:
|
82
104
|
check["type"] = "CheckStream"
|
83
105
|
check_stream = self._constructor.create_component(
|
84
|
-
CheckStreamModel,
|
106
|
+
CheckStreamModel,
|
107
|
+
check,
|
108
|
+
dict(),
|
109
|
+
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
85
110
|
)
|
86
111
|
if isinstance(check_stream, ConnectionChecker):
|
87
112
|
return check_stream
|
88
113
|
else:
|
89
|
-
raise ValueError(
|
114
|
+
raise ValueError(
|
115
|
+
f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}"
|
116
|
+
)
|
90
117
|
|
91
118
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
92
|
-
self._emit_manifest_debug_message(
|
119
|
+
self._emit_manifest_debug_message(
|
120
|
+
extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)}
|
121
|
+
)
|
93
122
|
stream_configs = self._stream_configs(self._source_config)
|
94
123
|
|
95
124
|
source_streams = [
|
96
125
|
self._constructor.create_component(
|
97
|
-
DeclarativeStreamModel,
|
126
|
+
DeclarativeStreamModel,
|
127
|
+
stream_config,
|
128
|
+
config,
|
129
|
+
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
98
130
|
)
|
99
131
|
for stream_config in self._initialize_cache_for_parent_streams(deepcopy(stream_configs))
|
100
132
|
]
|
@@ -102,7 +134,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
102
134
|
return source_streams
|
103
135
|
|
104
136
|
@staticmethod
|
105
|
-
def _initialize_cache_for_parent_streams(
|
137
|
+
def _initialize_cache_for_parent_streams(
|
138
|
+
stream_configs: List[Dict[str, Any]],
|
139
|
+
) -> List[Dict[str, Any]]:
|
106
140
|
parent_streams = set()
|
107
141
|
|
108
142
|
def update_with_cache_parent_configs(parent_configs: list[dict[str, Any]]) -> None:
|
@@ -113,12 +147,16 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
113
147
|
for stream_config in stream_configs:
|
114
148
|
if stream_config.get("incremental_sync", {}).get("parent_stream"):
|
115
149
|
parent_streams.add(stream_config["incremental_sync"]["parent_stream"]["name"])
|
116
|
-
stream_config["incremental_sync"]["parent_stream"]["retriever"]["requester"][
|
150
|
+
stream_config["incremental_sync"]["parent_stream"]["retriever"]["requester"][
|
151
|
+
"use_cache"
|
152
|
+
] = True
|
117
153
|
|
118
154
|
elif stream_config.get("retriever", {}).get("partition_router", {}):
|
119
155
|
partition_router = stream_config["retriever"]["partition_router"]
|
120
156
|
|
121
|
-
if isinstance(partition_router, dict) and partition_router.get(
|
157
|
+
if isinstance(partition_router, dict) and partition_router.get(
|
158
|
+
"parent_stream_configs"
|
159
|
+
):
|
122
160
|
update_with_cache_parent_configs(partition_router["parent_stream_configs"])
|
123
161
|
elif isinstance(partition_router, list):
|
124
162
|
for router in partition_router:
|
@@ -139,7 +177,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
139
177
|
in the project root.
|
140
178
|
"""
|
141
179
|
self._configure_logger_level(logger)
|
142
|
-
self._emit_manifest_debug_message(
|
180
|
+
self._emit_manifest_debug_message(
|
181
|
+
extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)}
|
182
|
+
)
|
143
183
|
|
144
184
|
spec = self._source_config.get("spec")
|
145
185
|
if spec:
|
@@ -176,22 +216,62 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
176
216
|
Validates the connector manifest against the declarative component schema
|
177
217
|
"""
|
178
218
|
try:
|
179
|
-
raw_component_schema = pkgutil.get_data(
|
219
|
+
raw_component_schema = pkgutil.get_data(
|
220
|
+
"airbyte_cdk", "sources/declarative/declarative_component_schema.yaml"
|
221
|
+
)
|
180
222
|
if raw_component_schema is not None:
|
181
|
-
declarative_component_schema = yaml.load(
|
223
|
+
declarative_component_schema = yaml.load(
|
224
|
+
raw_component_schema, Loader=yaml.SafeLoader
|
225
|
+
)
|
182
226
|
else:
|
183
|
-
raise RuntimeError(
|
227
|
+
raise RuntimeError(
|
228
|
+
"Failed to read manifest component json schema required for validation"
|
229
|
+
)
|
184
230
|
except FileNotFoundError as e:
|
185
|
-
raise FileNotFoundError(
|
231
|
+
raise FileNotFoundError(
|
232
|
+
f"Failed to read manifest component json schema required for validation: {e}"
|
233
|
+
)
|
186
234
|
|
187
235
|
streams = self._source_config.get("streams")
|
188
236
|
if not streams:
|
189
|
-
raise ValidationError(
|
237
|
+
raise ValidationError(
|
238
|
+
f"A valid manifest should have at least one stream defined. Got {streams}"
|
239
|
+
)
|
190
240
|
|
191
241
|
try:
|
192
242
|
validate(self._source_config, declarative_component_schema)
|
193
243
|
except ValidationError as e:
|
194
|
-
raise ValidationError(
|
244
|
+
raise ValidationError(
|
245
|
+
"Validation against json schema defined in declarative_component_schema.yaml schema failed"
|
246
|
+
) from e
|
247
|
+
|
248
|
+
cdk_version = metadata.version("airbyte_cdk")
|
249
|
+
cdk_major, cdk_minor, cdk_patch = self._get_version_parts(cdk_version, "airbyte-cdk")
|
250
|
+
manifest_version = self._source_config.get("version")
|
251
|
+
if manifest_version is None:
|
252
|
+
raise RuntimeError(
|
253
|
+
"Manifest version is not defined in the manifest. This is unexpected since it should be a required field. Please contact support."
|
254
|
+
)
|
255
|
+
manifest_major, manifest_minor, manifest_patch = self._get_version_parts(
|
256
|
+
manifest_version, "manifest"
|
257
|
+
)
|
258
|
+
|
259
|
+
if cdk_version.startswith("0.0.0"):
|
260
|
+
# Skipping version compatibility check on unreleased dev branch
|
261
|
+
pass
|
262
|
+
elif cdk_major < manifest_major or (
|
263
|
+
cdk_major == manifest_major and cdk_minor < manifest_minor
|
264
|
+
):
|
265
|
+
raise ValidationError(
|
266
|
+
f"The manifest version {manifest_version} is greater than the airbyte-cdk package version ({cdk_version}). Your "
|
267
|
+
f"manifest may contain features that are not in the current CDK version."
|
268
|
+
)
|
269
|
+
elif manifest_major == 0 and manifest_minor < 29:
|
270
|
+
raise ValidationError(
|
271
|
+
f"The low-code framework was promoted to Beta in airbyte-cdk version 0.29.0 and contains many breaking changes to the "
|
272
|
+
f"language. The manifest version {manifest_version} is incompatible with the airbyte-cdk package version "
|
273
|
+
f"{cdk_version} which contains these breaking changes."
|
274
|
+
)
|
195
275
|
|
196
276
|
@staticmethod
|
197
277
|
def _get_version_parts(version: str, version_type: str) -> Tuple[int, int, int]:
|
@@ -200,7 +280,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
200
280
|
"""
|
201
281
|
version_parts = re.split(r"\.", version)
|
202
282
|
if len(version_parts) != 3 or not all([part.isdigit() for part in version_parts]):
|
203
|
-
raise ValidationError(
|
283
|
+
raise ValidationError(
|
284
|
+
f"The {version_type} version {version} specified is not a valid version format (ex. 1.2.3)"
|
285
|
+
)
|
204
286
|
return tuple(int(part) for part in version_parts) # type: ignore # We already verified there were 3 parts and they are all digits
|
205
287
|
|
206
288
|
def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]:
|
@@ -43,7 +43,9 @@ class LegacyToPerPartitionStateMigration(StateMigration):
|
|
43
43
|
self._partition_key_field = InterpolatedString.create(
|
44
44
|
self._get_partition_field(self._partition_router), parameters=self._parameters
|
45
45
|
).eval(self._config)
|
46
|
-
self._cursor_field = InterpolatedString.create(
|
46
|
+
self._cursor_field = InterpolatedString.create(
|
47
|
+
self._cursor.cursor_field, parameters=self._parameters
|
48
|
+
).eval(self._config)
|
47
49
|
|
48
50
|
def _get_partition_field(self, partition_router: SubstreamPartitionRouter) -> str:
|
49
51
|
parent_stream_config = partition_router.parent_stream_configs[0]
|
@@ -85,5 +87,8 @@ class LegacyToPerPartitionStateMigration(StateMigration):
|
|
85
87
|
return True
|
86
88
|
|
87
89
|
def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
|
88
|
-
states = [
|
90
|
+
states = [
|
91
|
+
{"partition": {self._partition_key_field: key}, "cursor": value}
|
92
|
+
for key, value in stream_state.items()
|
93
|
+
]
|
89
94
|
return {"states": states}
|