airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +17 -2
- airbyte_cdk/config_observation.py +10 -3
- airbyte_cdk/connector.py +19 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
- airbyte_cdk/connector_builder/main.py +26 -6
- airbyte_cdk/connector_builder/message_grouper.py +95 -25
- airbyte_cdk/destinations/destination.py +47 -14
- airbyte_cdk/destinations/vector_db_based/config.py +36 -14
- airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
- airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
- airbyte_cdk/entrypoint.py +82 -26
- airbyte_cdk/exception_handler.py +13 -3
- airbyte_cdk/logger.py +10 -2
- airbyte_cdk/models/airbyte_protocol.py +11 -5
- airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/abstract_source.py +63 -17
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
- airbyte_cdk/sources/connector_state_manager.py +32 -10
- airbyte_cdk/sources/declarative/async_job/job.py +3 -1
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
- airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
- airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/token.py +25 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
- airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
- airbyte_cdk/sources/declarative/declarative_source.py +3 -1
- airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
- airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
- airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
- airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
- airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
- airbyte_cdk/sources/declarative/spec/spec.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
- airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
- airbyte_cdk/sources/declarative/types.py +8 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
- airbyte_cdk/sources/embedded/base_integration.py +14 -4
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +3 -1
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +13 -15
- airbyte_cdk/sources/file_based/file_based_source.py +82 -24
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
- airbyte_cdk/sources/http_logger.py +5 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +17 -7
- airbyte_cdk/sources/streams/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/call_rate.py +63 -19
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
- airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
- airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
- airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
- airbyte_cdk/sources/streams/core.py +77 -22
- airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
- airbyte_cdk/sources/streams/http/exceptions.py +2 -2
- airbyte_cdk/sources/streams/http/http.py +133 -33
- airbyte_cdk/sources/streams/http/http_client.py +91 -29
- airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +5 -1
- airbyte_cdk/sources/utils/record_helper.py +12 -3
- airbyte_cdk/sources/utils/schema_helpers.py +9 -3
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +24 -9
- airbyte_cdk/sql/exceptions.py +19 -6
- airbyte_cdk/sql/secrets.py +3 -1
- airbyte_cdk/sql/shared/catalog_providers.py +13 -4
- airbyte_cdk/sql/shared/sql_processor.py +44 -14
- airbyte_cdk/test/catalog_builder.py +19 -8
- airbyte_cdk/test/entrypoint_wrapper.py +27 -8
- airbyte_cdk/test/mock_http/mocker.py +41 -11
- airbyte_cdk/test/mock_http/request.py +9 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +29 -7
- airbyte_cdk/test/state_builder.py +10 -2
- airbyte_cdk/test/utils/data.py +6 -2
- airbyte_cdk/test/utils/http_mocking.py +3 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +11 -4
- airbyte_cdk/utils/print_buffer.py +6 -1
- airbyte_cdk/utils/schema_inferrer.py +30 -9
- airbyte_cdk/utils/spec_schema_transformations.py +3 -1
- airbyte_cdk/utils/traced_exception.py +35 -9
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -15,7 +15,9 @@ class ConnectionChecker(ABC):
|
|
15
15
|
"""
|
16
16
|
|
17
17
|
@abstractmethod
|
18
|
-
def check_connection(
|
18
|
+
def check_connection(
|
19
|
+
self, source: AbstractSource, logger: logging.Logger, config: Mapping[str, Any]
|
20
|
+
) -> Tuple[bool, Any]:
|
19
21
|
"""
|
20
22
|
Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect
|
21
23
|
to the Stripe API.
|
@@ -28,15 +28,23 @@ class ConcurrencyLevel:
|
|
28
28
|
if isinstance(self.default_concurrency, int):
|
29
29
|
self._default_concurrency: Union[int, InterpolatedString] = self.default_concurrency
|
30
30
|
elif "config" in self.default_concurrency and not self.max_concurrency:
|
31
|
-
raise ValueError(
|
31
|
+
raise ValueError(
|
32
|
+
"ConcurrencyLevel requires that max_concurrency be defined if the default_concurrency can be used-specified"
|
33
|
+
)
|
32
34
|
else:
|
33
|
-
self._default_concurrency = InterpolatedString.create(
|
35
|
+
self._default_concurrency = InterpolatedString.create(
|
36
|
+
self.default_concurrency, parameters=parameters
|
37
|
+
)
|
34
38
|
|
35
39
|
def get_concurrency_level(self) -> int:
|
36
40
|
if isinstance(self._default_concurrency, InterpolatedString):
|
37
41
|
evaluated_default_concurrency = self._default_concurrency.eval(config=self.config)
|
38
42
|
if not isinstance(evaluated_default_concurrency, int):
|
39
43
|
raise ValueError("default_concurrency did not evaluate to an integer")
|
40
|
-
return
|
44
|
+
return (
|
45
|
+
min(evaluated_default_concurrency, self.max_concurrency)
|
46
|
+
if self.max_concurrency
|
47
|
+
else evaluated_default_concurrency
|
48
|
+
)
|
41
49
|
else:
|
42
50
|
return self._default_concurrency
|
@@ -3,20 +3,34 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any,
|
7
|
-
|
8
|
-
from airbyte_cdk.models import
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union
|
7
|
+
|
8
|
+
from airbyte_cdk.models import (
|
9
|
+
AirbyteCatalog,
|
10
|
+
AirbyteMessage,
|
11
|
+
AirbyteStateMessage,
|
12
|
+
ConfiguredAirbyteCatalog,
|
13
|
+
)
|
9
14
|
from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
|
10
15
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
11
16
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
12
17
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
13
18
|
from airbyte_cdk.sources.declarative.extractors import RecordSelector
|
19
|
+
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
20
|
+
ClientSideIncrementalRecordFilterDecorator,
|
21
|
+
)
|
14
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
15
23
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
16
24
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
17
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
18
|
-
|
19
|
-
|
25
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
26
|
+
ConcurrencyLevel as ConcurrencyLevelModel,
|
27
|
+
)
|
28
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
|
+
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
30
|
+
)
|
31
|
+
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
32
|
+
ModelToComponentFactory,
|
33
|
+
)
|
20
34
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
21
35
|
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
22
36
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
|
@@ -25,13 +39,14 @@ from airbyte_cdk.sources.source import TState
|
|
25
39
|
from airbyte_cdk.sources.streams import Stream
|
26
40
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
27
41
|
from airbyte_cdk.sources.streams.concurrent.adapters import CursorPartitionGenerator
|
28
|
-
from airbyte_cdk.sources.streams.concurrent.availability_strategy import
|
42
|
+
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
43
|
+
AlwaysAvailableAvailabilityStrategy,
|
44
|
+
)
|
29
45
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
30
46
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
31
47
|
|
32
48
|
|
33
49
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
34
|
-
|
35
50
|
# By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
|
36
51
|
SINGLE_THREADED_CONCURRENCY_LEVEL = 1
|
37
52
|
|
@@ -63,7 +78,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
63
78
|
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
|
64
79
|
# for our future improvements to the CDK.
|
65
80
|
if config:
|
66
|
-
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
81
|
+
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
82
|
+
config=config or {}
|
83
|
+
)
|
67
84
|
else:
|
68
85
|
self._concurrent_streams = None
|
69
86
|
self._synchronous_streams = None
|
@@ -71,10 +88,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
71
88
|
concurrency_level_from_manifest = self._source_config.get("concurrency_level")
|
72
89
|
if concurrency_level_from_manifest:
|
73
90
|
concurrency_level_component = self._constructor.create_component(
|
74
|
-
model_type=ConcurrencyLevelModel,
|
91
|
+
model_type=ConcurrencyLevelModel,
|
92
|
+
component_definition=concurrency_level_from_manifest,
|
93
|
+
config=config or {},
|
75
94
|
)
|
76
95
|
if not isinstance(concurrency_level_component, ConcurrencyLevel):
|
77
|
-
raise ValueError(
|
96
|
+
raise ValueError(
|
97
|
+
f"Expected to generate a ConcurrencyLevel component, but received {concurrency_level_component.__class__}"
|
98
|
+
)
|
78
99
|
|
79
100
|
concurrency_level = concurrency_level_component.get_concurrency_level()
|
80
101
|
initial_number_of_partitions_to_generate = max(
|
@@ -99,13 +120,16 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
99
120
|
catalog: ConfiguredAirbyteCatalog,
|
100
121
|
state: Optional[Union[List[AirbyteStateMessage]]] = None,
|
101
122
|
) -> Iterator[AirbyteMessage]:
|
102
|
-
|
103
123
|
# ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
|
104
124
|
# streams must be saved so that they can be removed from the catalog before starting synchronous streams
|
105
125
|
if self._concurrent_streams:
|
106
|
-
concurrent_stream_names = set(
|
126
|
+
concurrent_stream_names = set(
|
127
|
+
[concurrent_stream.name for concurrent_stream in self._concurrent_streams]
|
128
|
+
)
|
107
129
|
|
108
|
-
selected_concurrent_streams = self._select_streams(
|
130
|
+
selected_concurrent_streams = self._select_streams(
|
131
|
+
streams=self._concurrent_streams, configured_catalog=catalog
|
132
|
+
)
|
109
133
|
# It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
|
110
134
|
# This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
|
111
135
|
if selected_concurrent_streams:
|
@@ -125,7 +149,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
125
149
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
126
150
|
concurrent_streams = self._concurrent_streams or []
|
127
151
|
synchronous_streams = self._synchronous_streams or []
|
128
|
-
return AirbyteCatalog(
|
152
|
+
return AirbyteCatalog(
|
153
|
+
streams=[
|
154
|
+
stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
|
155
|
+
]
|
156
|
+
)
|
129
157
|
|
130
158
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
131
159
|
"""
|
@@ -138,25 +166,34 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
138
166
|
"""
|
139
167
|
return super().streams(config)
|
140
168
|
|
141
|
-
def _group_streams(
|
169
|
+
def _group_streams(
|
170
|
+
self, config: Mapping[str, Any]
|
171
|
+
) -> Tuple[List[AbstractStream], List[Stream]]:
|
142
172
|
concurrent_streams: List[AbstractStream] = []
|
143
173
|
synchronous_streams: List[Stream] = []
|
144
174
|
|
145
175
|
state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
146
176
|
|
147
|
-
name_to_stream_mapping = {
|
177
|
+
name_to_stream_mapping = {
|
178
|
+
stream["name"]: stream for stream in self.resolved_manifest["streams"]
|
179
|
+
}
|
148
180
|
|
149
181
|
for declarative_stream in self.streams(config=config):
|
150
182
|
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
151
183
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
152
184
|
# so we need to treat them as synchronous
|
153
185
|
if isinstance(declarative_stream, DeclarativeStream):
|
154
|
-
datetime_based_cursor_component_definition = name_to_stream_mapping[
|
186
|
+
datetime_based_cursor_component_definition = name_to_stream_mapping[
|
187
|
+
declarative_stream.name
|
188
|
+
].get("incremental_sync")
|
155
189
|
|
156
190
|
if (
|
157
191
|
datetime_based_cursor_component_definition
|
158
|
-
and datetime_based_cursor_component_definition.get("type", "")
|
159
|
-
|
192
|
+
and datetime_based_cursor_component_definition.get("type", "")
|
193
|
+
== DatetimeBasedCursorModel.__name__
|
194
|
+
and self._stream_supports_concurrent_partition_processing(
|
195
|
+
declarative_stream=declarative_stream
|
196
|
+
)
|
160
197
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
161
198
|
and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
162
199
|
):
|
@@ -164,30 +201,38 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
164
201
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
165
202
|
)
|
166
203
|
|
167
|
-
cursor, connector_state_converter =
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
204
|
+
cursor, connector_state_converter = (
|
205
|
+
self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
206
|
+
state_manager=state_manager,
|
207
|
+
model_type=DatetimeBasedCursorModel,
|
208
|
+
component_definition=datetime_based_cursor_component_definition,
|
209
|
+
stream_name=declarative_stream.name,
|
210
|
+
stream_namespace=declarative_stream.namespace,
|
211
|
+
config=config or {},
|
212
|
+
stream_state=stream_state,
|
213
|
+
)
|
175
214
|
)
|
176
215
|
|
177
216
|
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
178
217
|
# low-code framework because state management is handled through the ConcurrentCursor.
|
179
|
-
if
|
218
|
+
if (
|
219
|
+
declarative_stream
|
220
|
+
and declarative_stream.retriever
|
221
|
+
and isinstance(declarative_stream.retriever, SimpleRetriever)
|
222
|
+
):
|
180
223
|
# Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
|
181
224
|
# called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
|
182
225
|
# for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
|
183
226
|
# ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
|
184
227
|
# with state.
|
185
228
|
if declarative_stream.retriever.cursor:
|
186
|
-
declarative_stream.retriever.cursor.set_initial_state(
|
229
|
+
declarative_stream.retriever.cursor.set_initial_state(
|
230
|
+
stream_state=stream_state
|
231
|
+
)
|
187
232
|
declarative_stream.retriever.cursor = None
|
188
233
|
|
189
234
|
partition_generator = CursorPartitionGenerator(
|
190
|
-
|
235
|
+
stream=declarative_stream,
|
191
236
|
message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory
|
192
237
|
cursor=cursor,
|
193
238
|
connector_state_converter=connector_state_converter,
|
@@ -214,7 +259,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
214
259
|
|
215
260
|
return concurrent_streams, synchronous_streams
|
216
261
|
|
217
|
-
def _stream_supports_concurrent_partition_processing(
|
262
|
+
def _stream_supports_concurrent_partition_processing(
|
263
|
+
self, declarative_stream: DeclarativeStream
|
264
|
+
) -> bool:
|
218
265
|
"""
|
219
266
|
Many connectors make use of stream_state during interpolation on a per-partition basis under the assumption that
|
220
267
|
state is updated sequentially. Because the concurrent CDK engine processes different partitions in parallel,
|
@@ -226,7 +273,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
226
273
|
cdk-migrations.md for the full list of connectors.
|
227
274
|
"""
|
228
275
|
|
229
|
-
if isinstance(declarative_stream.retriever, SimpleRetriever) and isinstance(
|
276
|
+
if isinstance(declarative_stream.retriever, SimpleRetriever) and isinstance(
|
277
|
+
declarative_stream.retriever.requester, HttpRequester
|
278
|
+
):
|
230
279
|
http_requester = declarative_stream.retriever.requester
|
231
280
|
if "stream_state" in http_requester._path.string:
|
232
281
|
self.logger.warning(
|
@@ -243,14 +292,22 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
243
292
|
|
244
293
|
record_selector = declarative_stream.retriever.record_selector
|
245
294
|
if isinstance(record_selector, RecordSelector):
|
246
|
-
if
|
295
|
+
if (
|
296
|
+
record_selector.record_filter
|
297
|
+
and not isinstance(
|
298
|
+
record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
|
299
|
+
)
|
300
|
+
and "stream_state" in record_selector.record_filter.condition
|
301
|
+
):
|
247
302
|
self.logger.warning(
|
248
303
|
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the RecordFilter which is not thread-safe. Defaulting to synchronous processing"
|
249
304
|
)
|
250
305
|
return False
|
251
306
|
|
252
307
|
for add_fields in [
|
253
|
-
transformation
|
308
|
+
transformation
|
309
|
+
for transformation in record_selector.transformations
|
310
|
+
if isinstance(transformation, AddFields)
|
254
311
|
]:
|
255
312
|
for field in add_fields.fields:
|
256
313
|
if isinstance(field.value, str) and "stream_state" in field.value:
|
@@ -258,7 +315,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
258
315
|
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
|
259
316
|
)
|
260
317
|
return False
|
261
|
-
if
|
318
|
+
if (
|
319
|
+
isinstance(field.value, InterpolatedString)
|
320
|
+
and "stream_state" in field.value.string
|
321
|
+
):
|
262
322
|
self.logger.warning(
|
263
323
|
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
|
264
324
|
)
|
@@ -266,7 +326,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
266
326
|
return True
|
267
327
|
|
268
328
|
@staticmethod
|
269
|
-
def _select_streams(
|
329
|
+
def _select_streams(
|
330
|
+
streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
|
331
|
+
) -> List[AbstractStream]:
|
270
332
|
stream_name_to_instance: Mapping[str, AbstractStream] = {s.name: s for s in streams}
|
271
333
|
abstract_streams: List[AbstractStream] = []
|
272
334
|
for configured_stream in configured_catalog.streams:
|
@@ -281,16 +343,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
281
343
|
catalog: ConfiguredAirbyteCatalog,
|
282
344
|
concurrent_stream_names: set[str],
|
283
345
|
) -> ConfiguredAirbyteCatalog:
|
284
|
-
return ConfiguredAirbyteCatalog(
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
def _create_new_stream() -> Stream:
|
292
|
-
streams_with_same_name = list(filter(lambda stream: stream.name == stream_to_copy.name, self.streams(config)))
|
293
|
-
if len(streams_with_same_name) == 1:
|
294
|
-
return streams_with_same_name[0]
|
295
|
-
raise ValueError(f"Expected one stream with name `{stream_to_copy.name}` but got {len(streams_with_same_name)}")
|
296
|
-
return _create_new_stream
|
346
|
+
return ConfiguredAirbyteCatalog(
|
347
|
+
streams=[
|
348
|
+
stream
|
349
|
+
for stream in catalog.streams
|
350
|
+
if stream.stream.name not in concurrent_stream_names
|
351
|
+
]
|
352
|
+
)
|
@@ -32,7 +32,7 @@ class MinMaxDatetime:
|
|
32
32
|
parameters: InitVar[Mapping[str, Any]]
|
33
33
|
# datetime_format is a unique case where we inherit it from the parent if it is not specified before using the default value
|
34
34
|
# which is why we need dedicated getter/setter methods and private dataclass field
|
35
|
-
datetime_format: str
|
35
|
+
datetime_format: str
|
36
36
|
_datetime_format: str = field(init=False, repr=False, default="")
|
37
37
|
min_datetime: Union[InterpolatedString, str] = ""
|
38
38
|
max_datetime: Union[InterpolatedString, str] = ""
|
@@ -40,10 +40,20 @@ class MinMaxDatetime:
|
|
40
40
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
41
41
|
self.datetime = InterpolatedString.create(self.datetime, parameters=parameters or {})
|
42
42
|
self._parser = DatetimeParser()
|
43
|
-
self.min_datetime =
|
44
|
-
|
43
|
+
self.min_datetime = (
|
44
|
+
InterpolatedString.create(self.min_datetime, parameters=parameters)
|
45
|
+
if self.min_datetime
|
46
|
+
else None
|
47
|
+
) # type: ignore
|
48
|
+
self.max_datetime = (
|
49
|
+
InterpolatedString.create(self.max_datetime, parameters=parameters)
|
50
|
+
if self.max_datetime
|
51
|
+
else None
|
52
|
+
) # type: ignore
|
45
53
|
|
46
|
-
def get_datetime(
|
54
|
+
def get_datetime(
|
55
|
+
self, config: Mapping[str, Any], **additional_parameters: Mapping[str, Any]
|
56
|
+
) -> dt.datetime:
|
47
57
|
"""
|
48
58
|
Evaluates and returns the datetime
|
49
59
|
:param config: The user-provided configuration as specified by the source's spec
|
@@ -55,7 +65,9 @@ class MinMaxDatetime:
|
|
55
65
|
if not datetime_format:
|
56
66
|
datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z"
|
57
67
|
|
58
|
-
time = self._parser.parse(
|
68
|
+
time = self._parser.parse(
|
69
|
+
str(self.datetime.eval(config, **additional_parameters)), datetime_format
|
70
|
+
) # type: ignore # datetime is always cast to an interpolated string
|
59
71
|
|
60
72
|
if self.min_datetime:
|
61
73
|
min_time = str(self.min_datetime.eval(config, **additional_parameters)) # type: ignore # min_datetime is always cast to an interpolated string
|
@@ -93,6 +105,8 @@ class MinMaxDatetime:
|
|
93
105
|
if isinstance(interpolated_string_or_min_max_datetime, InterpolatedString) or isinstance(
|
94
106
|
interpolated_string_or_min_max_datetime, str
|
95
107
|
):
|
96
|
-
return MinMaxDatetime(
|
108
|
+
return MinMaxDatetime(
|
109
|
+
datetime=interpolated_string_or_min_max_datetime, parameters=parameters
|
110
|
+
)
|
97
111
|
else:
|
98
112
|
return interpolated_string_or_min_max_datetime
|
@@ -1750,6 +1750,45 @@ definitions:
|
|
1750
1750
|
type:
|
1751
1751
|
type: string
|
1752
1752
|
enum: [XmlDecoder]
|
1753
|
+
CustomDecoder:
|
1754
|
+
title: Custom Decoder
|
1755
|
+
description: Use this to implement custom decoder logic.
|
1756
|
+
type: object
|
1757
|
+
additionalProperties: true
|
1758
|
+
required:
|
1759
|
+
- type
|
1760
|
+
- class_name
|
1761
|
+
properties:
|
1762
|
+
type:
|
1763
|
+
type: string
|
1764
|
+
enum: [CustomDecoder]
|
1765
|
+
class_name:
|
1766
|
+
title: Class Name
|
1767
|
+
description: Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_<name>.<package>.<class_name>`.
|
1768
|
+
type: string
|
1769
|
+
additionalProperties: true
|
1770
|
+
examples:
|
1771
|
+
- "source_amazon_ads.components.GzipJsonlDecoder"
|
1772
|
+
$parameters:
|
1773
|
+
type: object
|
1774
|
+
additionalProperties: true
|
1775
|
+
GzipJsonDecoder:
|
1776
|
+
title: GzipJson Decoder
|
1777
|
+
description: Use this if the response is Gzip compressed Json.
|
1778
|
+
type: object
|
1779
|
+
additionalProperties: true
|
1780
|
+
required:
|
1781
|
+
- type
|
1782
|
+
properties:
|
1783
|
+
type:
|
1784
|
+
type: string
|
1785
|
+
enum: [GzipJsonDecoder]
|
1786
|
+
encoding:
|
1787
|
+
type: string
|
1788
|
+
default: utf-8
|
1789
|
+
$parameters:
|
1790
|
+
type: object
|
1791
|
+
additionalProperties: true
|
1753
1792
|
ListPartitionRouter:
|
1754
1793
|
title: List Partition Router
|
1755
1794
|
description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
|
@@ -2404,10 +2443,12 @@ definitions:
|
|
2404
2443
|
title: Decoder
|
2405
2444
|
description: Component decoding the response so records can be extracted.
|
2406
2445
|
anyOf:
|
2446
|
+
- "$ref": "#/definitions/CustomDecoder"
|
2407
2447
|
- "$ref": "#/definitions/JsonDecoder"
|
2408
2448
|
- "$ref": "#/definitions/JsonlDecoder"
|
2409
2449
|
- "$ref": "#/definitions/IterableDecoder"
|
2410
2450
|
- "$ref": "#/definitions/XmlDecoder"
|
2451
|
+
- "$ref": "#/definitions/GzipJsonDecoder"
|
2411
2452
|
$parameters:
|
2412
2453
|
type: object
|
2413
2454
|
additionalProperties: true
|
@@ -2520,10 +2561,12 @@ definitions:
|
|
2520
2561
|
title: Decoder
|
2521
2562
|
description: Component decoding the response so records can be extracted.
|
2522
2563
|
anyOf:
|
2564
|
+
- "$ref": "#/definitions/CustomDecoder"
|
2523
2565
|
- "$ref": "#/definitions/JsonDecoder"
|
2524
2566
|
- "$ref": "#/definitions/JsonlDecoder"
|
2525
2567
|
- "$ref": "#/definitions/IterableDecoder"
|
2526
2568
|
- "$ref": "#/definitions/XmlDecoder"
|
2569
|
+
- "$ref": "#/definitions/GzipJsonDecoder"
|
2527
2570
|
$parameters:
|
2528
2571
|
type: object
|
2529
2572
|
additionalProperties: true
|
@@ -20,7 +20,9 @@ class DeclarativeSource(AbstractSource):
|
|
20
20
|
def connection_checker(self) -> ConnectionChecker:
|
21
21
|
"""Returns the ConnectionChecker to use for the `check` operation"""
|
22
22
|
|
23
|
-
def check_connection(
|
23
|
+
def check_connection(
|
24
|
+
self, logger: logging.Logger, config: Mapping[str, Any]
|
25
|
+
) -> Tuple[bool, Any]:
|
24
26
|
"""
|
25
27
|
:param logger: The source logger
|
26
28
|
:param config: The user-provided configuration as specified by the source's spec.
|
@@ -6,14 +6,23 @@ from dataclasses import InitVar, dataclass, field
|
|
6
6
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.models import SyncMode
|
9
|
-
from airbyte_cdk.sources.declarative.incremental import
|
9
|
+
from airbyte_cdk.sources.declarative.incremental import (
|
10
|
+
GlobalSubstreamCursor,
|
11
|
+
PerPartitionCursor,
|
12
|
+
PerPartitionWithGlobalCursor,
|
13
|
+
)
|
10
14
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
11
15
|
from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
|
12
16
|
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
13
17
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
14
18
|
from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader
|
15
19
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
-
from airbyte_cdk.sources.streams.checkpoint import
|
20
|
+
from airbyte_cdk.sources.streams.checkpoint import (
|
21
|
+
CheckpointMode,
|
22
|
+
CheckpointReader,
|
23
|
+
Cursor,
|
24
|
+
CursorBasedCheckpointReader,
|
25
|
+
)
|
17
26
|
from airbyte_cdk.sources.streams.core import Stream
|
18
27
|
from airbyte_cdk.sources.types import Config, StreamSlice
|
19
28
|
|
@@ -50,7 +59,11 @@ class DeclarativeStream(Stream):
|
|
50
59
|
if isinstance(self.stream_cursor_field, str)
|
51
60
|
else self.stream_cursor_field
|
52
61
|
)
|
53
|
-
self._schema_loader =
|
62
|
+
self._schema_loader = (
|
63
|
+
self.schema_loader
|
64
|
+
if self.schema_loader
|
65
|
+
else DefaultSchemaLoader(config=self.config, parameters=parameters)
|
66
|
+
)
|
54
67
|
|
55
68
|
@property # type: ignore
|
56
69
|
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
@@ -133,7 +146,9 @@ class DeclarativeStream(Stream):
|
|
133
146
|
# empty slice which seems to make sense.
|
134
147
|
stream_slice = StreamSlice(partition={}, cursor_slice={})
|
135
148
|
if not isinstance(stream_slice, StreamSlice):
|
136
|
-
raise ValueError(
|
149
|
+
raise ValueError(
|
150
|
+
f"DeclarativeStream does not support stream_slices that are not StreamSlice. Got {stream_slice}"
|
151
|
+
)
|
137
152
|
yield from self.retriever.read_records(self.get_json_schema(), stream_slice) # type: ignore # records are of the correct type
|
138
153
|
|
139
154
|
def get_json_schema(self) -> Mapping[str, Any]: # type: ignore
|
@@ -146,7 +161,11 @@ class DeclarativeStream(Stream):
|
|
146
161
|
return self._schema_loader.get_json_schema()
|
147
162
|
|
148
163
|
def stream_slices(
|
149
|
-
self,
|
164
|
+
self,
|
165
|
+
*,
|
166
|
+
sync_mode: SyncMode,
|
167
|
+
cursor_field: Optional[List[str]] = None,
|
168
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
150
169
|
) -> Iterable[Optional[StreamSlice]]:
|
151
170
|
"""
|
152
171
|
Override to define the slices for this stream. See the stream slicing section of the docs for more information.
|
@@ -200,7 +219,9 @@ class DeclarativeStream(Stream):
|
|
200
219
|
cursor = self.get_cursor()
|
201
220
|
checkpoint_mode = self._checkpoint_mode
|
202
221
|
|
203
|
-
if isinstance(
|
222
|
+
if isinstance(
|
223
|
+
cursor, (GlobalSubstreamCursor, PerPartitionCursor, PerPartitionWithGlobalCursor)
|
224
|
+
):
|
204
225
|
self.has_multiple_slices = True
|
205
226
|
return CursorBasedCheckpointReader(
|
206
227
|
stream_slices=mappings_or_slices,
|
@@ -3,9 +3,9 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
6
|
-
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder, IterableDecoder
|
6
|
+
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder, IterableDecoder, GzipJsonDecoder
|
7
7
|
from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder
|
8
8
|
from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import PaginationDecoderDecorator
|
9
9
|
from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
|
10
10
|
|
11
|
-
__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"]
|
11
|
+
__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "GzipJsonDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"]
|
@@ -22,7 +22,9 @@ class Decoder:
|
|
22
22
|
"""
|
23
23
|
|
24
24
|
@abstractmethod
|
25
|
-
def decode(
|
25
|
+
def decode(
|
26
|
+
self, response: requests.Response
|
27
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
26
28
|
"""
|
27
29
|
Decodes a requests.Response into a Mapping[str, Any] or an array
|
28
30
|
:param response: the response to decode
|