airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +17 -2
- airbyte_cdk/config_observation.py +10 -3
- airbyte_cdk/connector.py +19 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
- airbyte_cdk/connector_builder/main.py +26 -6
- airbyte_cdk/connector_builder/message_grouper.py +95 -25
- airbyte_cdk/destinations/destination.py +47 -14
- airbyte_cdk/destinations/vector_db_based/config.py +36 -14
- airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
- airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
- airbyte_cdk/entrypoint.py +82 -26
- airbyte_cdk/exception_handler.py +13 -3
- airbyte_cdk/logger.py +10 -2
- airbyte_cdk/models/airbyte_protocol.py +11 -5
- airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/abstract_source.py +63 -17
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
- airbyte_cdk/sources/connector_state_manager.py +32 -10
- airbyte_cdk/sources/declarative/async_job/job.py +3 -1
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
- airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
- airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/token.py +25 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
- airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
- airbyte_cdk/sources/declarative/declarative_source.py +3 -1
- airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
- airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
- airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
- airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
- airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
- airbyte_cdk/sources/declarative/spec/spec.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
- airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
- airbyte_cdk/sources/declarative/types.py +8 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
- airbyte_cdk/sources/embedded/base_integration.py +14 -4
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +3 -1
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +13 -15
- airbyte_cdk/sources/file_based/file_based_source.py +82 -24
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
- airbyte_cdk/sources/http_logger.py +5 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +17 -7
- airbyte_cdk/sources/streams/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/call_rate.py +63 -19
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
- airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
- airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
- airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
- airbyte_cdk/sources/streams/core.py +77 -22
- airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
- airbyte_cdk/sources/streams/http/exceptions.py +2 -2
- airbyte_cdk/sources/streams/http/http.py +133 -33
- airbyte_cdk/sources/streams/http/http_client.py +91 -29
- airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +5 -1
- airbyte_cdk/sources/utils/record_helper.py +12 -3
- airbyte_cdk/sources/utils/schema_helpers.py +9 -3
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +24 -9
- airbyte_cdk/sql/exceptions.py +19 -6
- airbyte_cdk/sql/secrets.py +3 -1
- airbyte_cdk/sql/shared/catalog_providers.py +13 -4
- airbyte_cdk/sql/shared/sql_processor.py +44 -14
- airbyte_cdk/test/catalog_builder.py +19 -8
- airbyte_cdk/test/entrypoint_wrapper.py +27 -8
- airbyte_cdk/test/mock_http/mocker.py +41 -11
- airbyte_cdk/test/mock_http/request.py +9 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +29 -7
- airbyte_cdk/test/state_builder.py +10 -2
- airbyte_cdk/test/utils/data.py +6 -2
- airbyte_cdk/test/utils/http_mocking.py +3 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +11 -4
- airbyte_cdk/utils/print_buffer.py +6 -1
- airbyte_cdk/utils/schema_inferrer.py +30 -9
- airbyte_cdk/utils/spec_schema_transformations.py +3 -1
- airbyte_cdk/utils/traced_exception.py +35 -9
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -9,7 +9,21 @@ import importlib
|
|
9
9
|
import inspect
|
10
10
|
import re
|
11
11
|
from functools import partial
|
12
|
-
from typing import
|
12
|
+
from typing import (
|
13
|
+
Any,
|
14
|
+
Callable,
|
15
|
+
Dict,
|
16
|
+
List,
|
17
|
+
Mapping,
|
18
|
+
MutableMapping,
|
19
|
+
Optional,
|
20
|
+
Tuple,
|
21
|
+
Type,
|
22
|
+
Union,
|
23
|
+
get_args,
|
24
|
+
get_origin,
|
25
|
+
get_type_hints,
|
26
|
+
)
|
13
27
|
|
14
28
|
from airbyte_cdk.models import FailureType, Level
|
15
29
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
@@ -18,9 +32,14 @@ from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
|
|
18
32
|
from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
|
19
33
|
from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
|
20
34
|
from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator
|
21
|
-
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import
|
35
|
+
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
|
36
|
+
DeclarativeAuthenticator,
|
37
|
+
NoAuth,
|
38
|
+
)
|
22
39
|
from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm
|
23
|
-
from airbyte_cdk.sources.declarative.auth.oauth import
|
40
|
+
from airbyte_cdk.sources.declarative.auth.oauth import (
|
41
|
+
DeclarativeSingleUseRefreshTokenOauth2Authenticator,
|
42
|
+
)
|
24
43
|
from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator
|
25
44
|
from airbyte_cdk.sources.declarative.auth.token import (
|
26
45
|
ApiKeyAuthenticator,
|
@@ -28,22 +47,36 @@ from airbyte_cdk.sources.declarative.auth.token import (
|
|
28
47
|
BearerAuthenticator,
|
29
48
|
LegacySessionTokenAuthenticator,
|
30
49
|
)
|
31
|
-
from airbyte_cdk.sources.declarative.auth.token_provider import
|
50
|
+
from airbyte_cdk.sources.declarative.auth.token_provider import (
|
51
|
+
InterpolatedStringTokenProvider,
|
52
|
+
SessionTokenProvider,
|
53
|
+
TokenProvider,
|
54
|
+
)
|
32
55
|
from airbyte_cdk.sources.declarative.checks import CheckStream
|
33
56
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
34
57
|
from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
35
58
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
36
59
|
from airbyte_cdk.sources.declarative.decoders import (
|
37
60
|
Decoder,
|
61
|
+
GzipJsonDecoder,
|
38
62
|
IterableDecoder,
|
39
63
|
JsonDecoder,
|
40
64
|
JsonlDecoder,
|
41
65
|
PaginationDecoderDecorator,
|
42
66
|
XmlDecoder,
|
43
67
|
)
|
44
|
-
from airbyte_cdk.sources.declarative.extractors import
|
45
|
-
|
46
|
-
|
68
|
+
from airbyte_cdk.sources.declarative.extractors import (
|
69
|
+
DpathExtractor,
|
70
|
+
RecordFilter,
|
71
|
+
RecordSelector,
|
72
|
+
ResponseToFileExtractor,
|
73
|
+
)
|
74
|
+
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
75
|
+
ClientSideIncrementalRecordFilterDecorator,
|
76
|
+
)
|
77
|
+
from airbyte_cdk.sources.declarative.extractors.record_selector import (
|
78
|
+
SCHEMA_TRANSFORMER_TYPE_MAPPING,
|
79
|
+
)
|
47
80
|
from airbyte_cdk.sources.declarative.incremental import (
|
48
81
|
ChildPartitionResumableFullRefreshCursor,
|
49
82
|
CursorFactory,
|
@@ -56,88 +89,222 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
56
89
|
)
|
57
90
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
58
91
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
59
|
-
from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import
|
92
|
+
from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
|
93
|
+
LegacyToPerPartitionStateMigration,
|
94
|
+
)
|
60
95
|
from airbyte_cdk.sources.declarative.models import CustomStateMigration
|
61
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
62
|
-
|
63
|
-
|
64
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
65
|
-
|
66
|
-
|
67
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
68
|
-
|
69
|
-
|
70
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
71
|
-
|
72
|
-
|
73
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
74
|
-
|
75
|
-
|
76
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
77
|
-
|
78
|
-
|
79
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
80
|
-
|
81
|
-
|
82
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
83
|
-
|
84
|
-
|
85
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
86
|
-
|
87
|
-
|
88
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
89
|
-
|
96
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
97
|
+
AddedFieldDefinition as AddedFieldDefinitionModel,
|
98
|
+
)
|
99
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
100
|
+
AddFields as AddFieldsModel,
|
101
|
+
)
|
102
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
103
|
+
ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
|
104
|
+
)
|
105
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
106
|
+
AsyncJobStatusMap as AsyncJobStatusMapModel,
|
107
|
+
)
|
108
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
109
|
+
AsyncRetriever as AsyncRetrieverModel,
|
110
|
+
)
|
111
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
112
|
+
BasicHttpAuthenticator as BasicHttpAuthenticatorModel,
|
113
|
+
)
|
114
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
115
|
+
BearerAuthenticator as BearerAuthenticatorModel,
|
116
|
+
)
|
117
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
118
|
+
CheckStream as CheckStreamModel,
|
119
|
+
)
|
120
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
121
|
+
CompositeErrorHandler as CompositeErrorHandlerModel,
|
122
|
+
)
|
123
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
124
|
+
ConcurrencyLevel as ConcurrencyLevelModel,
|
125
|
+
)
|
126
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
127
|
+
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
128
|
+
)
|
129
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
130
|
+
CursorPagination as CursorPaginationModel,
|
131
|
+
)
|
132
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
133
|
+
CustomAuthenticator as CustomAuthenticatorModel,
|
134
|
+
)
|
135
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
136
|
+
CustomBackoffStrategy as CustomBackoffStrategyModel,
|
137
|
+
)
|
138
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
139
|
+
CustomDecoder as CustomDecoderModel,
|
140
|
+
)
|
141
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
142
|
+
CustomErrorHandler as CustomErrorHandlerModel,
|
143
|
+
)
|
144
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
145
|
+
CustomIncrementalSync as CustomIncrementalSyncModel,
|
146
|
+
)
|
147
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
148
|
+
CustomPaginationStrategy as CustomPaginationStrategyModel,
|
149
|
+
)
|
150
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
151
|
+
CustomPartitionRouter as CustomPartitionRouterModel,
|
152
|
+
)
|
153
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
154
|
+
CustomRecordExtractor as CustomRecordExtractorModel,
|
155
|
+
)
|
156
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
157
|
+
CustomRecordFilter as CustomRecordFilterModel,
|
158
|
+
)
|
159
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
160
|
+
CustomRequester as CustomRequesterModel,
|
161
|
+
)
|
162
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
163
|
+
CustomRetriever as CustomRetrieverModel,
|
164
|
+
)
|
165
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
166
|
+
CustomSchemaLoader as CustomSchemaLoader,
|
167
|
+
)
|
168
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
169
|
+
CustomTransformation as CustomTransformationModel,
|
170
|
+
)
|
171
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
172
|
+
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
173
|
+
)
|
174
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
175
|
+
DeclarativeStream as DeclarativeStreamModel,
|
176
|
+
)
|
177
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
178
|
+
DefaultErrorHandler as DefaultErrorHandlerModel,
|
179
|
+
)
|
180
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
181
|
+
DefaultPaginator as DefaultPaginatorModel,
|
182
|
+
)
|
183
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
184
|
+
DpathExtractor as DpathExtractorModel,
|
185
|
+
)
|
90
186
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
91
187
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
92
188
|
)
|
93
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
94
|
-
|
95
|
-
|
96
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
97
|
-
|
98
|
-
|
99
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
100
|
-
|
101
|
-
|
102
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
103
|
-
|
189
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
190
|
+
GzipJsonDecoder as GzipJsonDecoderModel,
|
191
|
+
)
|
192
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
193
|
+
HttpRequester as HttpRequesterModel,
|
194
|
+
)
|
195
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
196
|
+
HttpResponseFilter as HttpResponseFilterModel,
|
197
|
+
)
|
198
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
199
|
+
InlineSchemaLoader as InlineSchemaLoaderModel,
|
200
|
+
)
|
201
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
202
|
+
IterableDecoder as IterableDecoderModel,
|
203
|
+
)
|
204
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
205
|
+
JsonDecoder as JsonDecoderModel,
|
206
|
+
)
|
207
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
208
|
+
JsonFileSchemaLoader as JsonFileSchemaLoaderModel,
|
209
|
+
)
|
210
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
211
|
+
JsonlDecoder as JsonlDecoderModel,
|
212
|
+
)
|
213
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
214
|
+
JwtAuthenticator as JwtAuthenticatorModel,
|
215
|
+
)
|
216
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
217
|
+
JwtHeaders as JwtHeadersModel,
|
218
|
+
)
|
219
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
220
|
+
JwtPayload as JwtPayloadModel,
|
221
|
+
)
|
222
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
223
|
+
KeysToLower as KeysToLowerModel,
|
224
|
+
)
|
104
225
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
105
226
|
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
|
106
227
|
)
|
107
228
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
108
229
|
LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel,
|
109
230
|
)
|
110
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
111
|
-
|
112
|
-
|
113
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
114
|
-
|
115
|
-
|
116
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
117
|
-
|
118
|
-
|
119
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
120
|
-
|
121
|
-
|
122
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
123
|
-
|
124
|
-
|
125
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
231
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
232
|
+
ListPartitionRouter as ListPartitionRouterModel,
|
233
|
+
)
|
234
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
235
|
+
MinMaxDatetime as MinMaxDatetimeModel,
|
236
|
+
)
|
237
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
238
|
+
NoAuth as NoAuthModel,
|
239
|
+
)
|
240
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
241
|
+
NoPagination as NoPaginationModel,
|
242
|
+
)
|
243
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
244
|
+
OAuthAuthenticator as OAuthAuthenticatorModel,
|
245
|
+
)
|
246
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
247
|
+
OffsetIncrement as OffsetIncrementModel,
|
248
|
+
)
|
249
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
250
|
+
PageIncrement as PageIncrementModel,
|
251
|
+
)
|
252
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
253
|
+
ParentStreamConfig as ParentStreamConfigModel,
|
254
|
+
)
|
255
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
256
|
+
RecordFilter as RecordFilterModel,
|
257
|
+
)
|
258
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
259
|
+
RecordSelector as RecordSelectorModel,
|
260
|
+
)
|
261
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
262
|
+
RemoveFields as RemoveFieldsModel,
|
263
|
+
)
|
264
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
265
|
+
RequestOption as RequestOptionModel,
|
266
|
+
)
|
267
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
268
|
+
RequestPath as RequestPathModel,
|
269
|
+
)
|
270
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
271
|
+
SelectiveAuthenticator as SelectiveAuthenticatorModel,
|
272
|
+
)
|
273
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
274
|
+
SessionTokenAuthenticator as SessionTokenAuthenticatorModel,
|
275
|
+
)
|
276
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
277
|
+
SimpleRetriever as SimpleRetrieverModel,
|
278
|
+
)
|
126
279
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
127
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
280
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
281
|
+
SubstreamPartitionRouter as SubstreamPartitionRouterModel,
|
282
|
+
)
|
128
283
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
129
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
130
|
-
|
131
|
-
|
284
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
285
|
+
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
286
|
+
)
|
287
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
288
|
+
WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel,
|
289
|
+
)
|
290
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
291
|
+
XmlDecoder as XmlDecoderModel,
|
292
|
+
)
|
132
293
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
133
294
|
CartesianProductStreamSlicer,
|
134
295
|
ListPartitionRouter,
|
135
296
|
SinglePartitionRouter,
|
136
297
|
SubstreamPartitionRouter,
|
137
298
|
)
|
138
|
-
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import
|
299
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
300
|
+
ParentStreamConfig,
|
301
|
+
)
|
139
302
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption
|
140
|
-
from airbyte_cdk.sources.declarative.requesters.error_handlers import
|
303
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers import (
|
304
|
+
CompositeErrorHandler,
|
305
|
+
DefaultErrorHandler,
|
306
|
+
HttpResponseFilter,
|
307
|
+
)
|
141
308
|
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import (
|
142
309
|
ConstantBackoffStrategy,
|
143
310
|
ExponentialBackoffStrategy,
|
@@ -145,7 +312,11 @@ from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategie
|
|
145
312
|
WaitUntilTimeFromHeaderBackoffStrategy,
|
146
313
|
)
|
147
314
|
from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
|
148
|
-
from airbyte_cdk.sources.declarative.requesters.paginators import
|
315
|
+
from airbyte_cdk.sources.declarative.requesters.paginators import (
|
316
|
+
DefaultPaginator,
|
317
|
+
NoPagination,
|
318
|
+
PaginatorTestReadDecorator,
|
319
|
+
)
|
149
320
|
from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
|
150
321
|
CursorPaginationStrategy,
|
151
322
|
CursorStopCondition,
|
@@ -162,14 +333,32 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
|
|
162
333
|
)
|
163
334
|
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
164
335
|
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
165
|
-
from airbyte_cdk.sources.declarative.retrievers import
|
166
|
-
|
336
|
+
from airbyte_cdk.sources.declarative.retrievers import (
|
337
|
+
AsyncRetriever,
|
338
|
+
SimpleRetriever,
|
339
|
+
SimpleRetrieverTestReadDecorator,
|
340
|
+
)
|
341
|
+
from airbyte_cdk.sources.declarative.schema import (
|
342
|
+
DefaultSchemaLoader,
|
343
|
+
InlineSchemaLoader,
|
344
|
+
JsonFileSchemaLoader,
|
345
|
+
)
|
167
346
|
from airbyte_cdk.sources.declarative.spec import Spec
|
168
347
|
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
169
|
-
from airbyte_cdk.sources.declarative.transformations import
|
348
|
+
from airbyte_cdk.sources.declarative.transformations import (
|
349
|
+
AddFields,
|
350
|
+
RecordTransformation,
|
351
|
+
RemoveFields,
|
352
|
+
)
|
170
353
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
|
171
|
-
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import
|
172
|
-
|
354
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
355
|
+
KeysToLowerTransformation,
|
356
|
+
)
|
357
|
+
from airbyte_cdk.sources.message import (
|
358
|
+
InMemoryMessageRepository,
|
359
|
+
LogAppenderMessageRepositoryDecorator,
|
360
|
+
MessageRepository,
|
361
|
+
)
|
173
362
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
174
363
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
175
364
|
CustomFormatConcurrentStreamStateConverter,
|
@@ -185,7 +374,6 @@ ComponentDefinition = Mapping[str, Any]
|
|
185
374
|
|
186
375
|
|
187
376
|
class ModelToComponentFactory:
|
188
|
-
|
189
377
|
EPOCH_DATETIME_FORMAT = "%s"
|
190
378
|
|
191
379
|
def __init__(
|
@@ -221,6 +409,7 @@ class ModelToComponentFactory:
|
|
221
409
|
CursorPaginationModel: self.create_cursor_pagination,
|
222
410
|
CustomAuthenticatorModel: self.create_custom_component,
|
223
411
|
CustomBackoffStrategyModel: self.create_custom_component,
|
412
|
+
CustomDecoderModel: self.create_custom_component,
|
224
413
|
CustomErrorHandlerModel: self.create_custom_component,
|
225
414
|
CustomIncrementalSyncModel: self.create_custom_component,
|
226
415
|
CustomRecordExtractorModel: self.create_custom_component,
|
@@ -244,6 +433,7 @@ class ModelToComponentFactory:
|
|
244
433
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
245
434
|
JsonDecoderModel: self.create_json_decoder,
|
246
435
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
436
|
+
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
247
437
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
248
438
|
IterableDecoderModel: self.create_iterable_decoder,
|
249
439
|
XmlDecoderModel: self.create_xml_decoder,
|
@@ -277,7 +467,11 @@ class ModelToComponentFactory:
|
|
277
467
|
self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
|
278
468
|
|
279
469
|
def create_component(
|
280
|
-
self,
|
470
|
+
self,
|
471
|
+
model_type: Type[BaseModel],
|
472
|
+
component_definition: ComponentDefinition,
|
473
|
+
config: Config,
|
474
|
+
**kwargs: Any,
|
281
475
|
) -> Any:
|
282
476
|
"""
|
283
477
|
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
|
@@ -292,26 +486,38 @@ class ModelToComponentFactory:
|
|
292
486
|
|
293
487
|
component_type = component_definition.get("type")
|
294
488
|
if component_definition.get("type") != model_type.__name__:
|
295
|
-
raise ValueError(
|
489
|
+
raise ValueError(
|
490
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
491
|
+
)
|
296
492
|
|
297
493
|
declarative_component_model = model_type.parse_obj(component_definition)
|
298
494
|
|
299
495
|
if not isinstance(declarative_component_model, model_type):
|
300
|
-
raise ValueError(
|
496
|
+
raise ValueError(
|
497
|
+
f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
|
498
|
+
)
|
301
499
|
|
302
|
-
return self._create_component_from_model(
|
500
|
+
return self._create_component_from_model(
|
501
|
+
model=declarative_component_model, config=config, **kwargs
|
502
|
+
)
|
303
503
|
|
304
504
|
def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
|
305
505
|
if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
|
306
|
-
raise ValueError(
|
506
|
+
raise ValueError(
|
507
|
+
f"{model.__class__} with attributes {model} is not a valid component type"
|
508
|
+
)
|
307
509
|
component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
|
308
510
|
if not component_constructor:
|
309
511
|
raise ValueError(f"Could not find constructor for {model.__class__}")
|
310
512
|
return component_constructor(model=model, config=config, **kwargs)
|
311
513
|
|
312
514
|
@staticmethod
|
313
|
-
def create_added_field_definition(
|
314
|
-
|
515
|
+
def create_added_field_definition(
|
516
|
+
model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
|
517
|
+
) -> AddedFieldDefinition:
|
518
|
+
interpolated_value = InterpolatedString.create(
|
519
|
+
model.value, parameters=model.parameters or {}
|
520
|
+
)
|
315
521
|
return AddedFieldDefinition(
|
316
522
|
path=model.path,
|
317
523
|
value=interpolated_value,
|
@@ -323,14 +529,18 @@ class ModelToComponentFactory:
|
|
323
529
|
added_field_definitions = [
|
324
530
|
self._create_component_from_model(
|
325
531
|
model=added_field_definition_model,
|
326
|
-
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
532
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
533
|
+
added_field_definition_model.value_type
|
534
|
+
),
|
327
535
|
config=config,
|
328
536
|
)
|
329
537
|
for added_field_definition_model in model.fields
|
330
538
|
]
|
331
539
|
return AddFields(fields=added_field_definitions, parameters=model.parameters or {})
|
332
540
|
|
333
|
-
def create_keys_to_lower_transformation(
|
541
|
+
def create_keys_to_lower_transformation(
|
542
|
+
self, model: KeysToLowerModel, config: Config, **kwargs: Any
|
543
|
+
) -> KeysToLowerTransformation:
|
334
544
|
return KeysToLowerTransformation()
|
335
545
|
|
336
546
|
@staticmethod
|
@@ -347,16 +557,25 @@ class ModelToComponentFactory:
|
|
347
557
|
|
348
558
|
@staticmethod
|
349
559
|
def create_api_key_authenticator(
|
350
|
-
model: ApiKeyAuthenticatorModel,
|
560
|
+
model: ApiKeyAuthenticatorModel,
|
561
|
+
config: Config,
|
562
|
+
token_provider: Optional[TokenProvider] = None,
|
563
|
+
**kwargs: Any,
|
351
564
|
) -> ApiKeyAuthenticator:
|
352
565
|
if model.inject_into is None and model.header is None:
|
353
|
-
raise ValueError(
|
566
|
+
raise ValueError(
|
567
|
+
"Expected either inject_into or header to be set for ApiKeyAuthenticator"
|
568
|
+
)
|
354
569
|
|
355
570
|
if model.inject_into is not None and model.header is not None:
|
356
|
-
raise ValueError(
|
571
|
+
raise ValueError(
|
572
|
+
"inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
|
573
|
+
)
|
357
574
|
|
358
575
|
if token_provider is not None and model.api_token != "":
|
359
|
-
raise ValueError(
|
576
|
+
raise ValueError(
|
577
|
+
"If token_provider is set, api_token is ignored and has to be set to empty string."
|
578
|
+
)
|
360
579
|
|
361
580
|
request_option = (
|
362
581
|
RequestOption(
|
@@ -375,7 +594,11 @@ class ModelToComponentFactory:
|
|
375
594
|
token_provider=(
|
376
595
|
token_provider
|
377
596
|
if token_provider is not None
|
378
|
-
else InterpolatedStringTokenProvider(
|
597
|
+
else InterpolatedStringTokenProvider(
|
598
|
+
api_token=model.api_token or "",
|
599
|
+
config=config,
|
600
|
+
parameters=model.parameters or {},
|
601
|
+
)
|
379
602
|
),
|
380
603
|
request_option=request_option,
|
381
604
|
config=config,
|
@@ -394,26 +617,49 @@ class ModelToComponentFactory:
|
|
394
617
|
f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}"
|
395
618
|
)
|
396
619
|
partition_router = retriever.partition_router
|
397
|
-
if not isinstance(
|
620
|
+
if not isinstance(
|
621
|
+
partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
|
622
|
+
):
|
398
623
|
raise ValueError(
|
399
624
|
f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
|
400
625
|
)
|
401
626
|
if not hasattr(partition_router, "parent_stream_configs"):
|
402
|
-
raise ValueError(
|
627
|
+
raise ValueError(
|
628
|
+
"LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
|
629
|
+
)
|
403
630
|
|
404
|
-
|
631
|
+
if not hasattr(declarative_stream, "incremental_sync"):
|
632
|
+
raise ValueError(
|
633
|
+
"LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
|
634
|
+
)
|
635
|
+
|
636
|
+
return LegacyToPerPartitionStateMigration(
|
637
|
+
partition_router, # type: ignore # was already checked above
|
638
|
+
declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams.
|
639
|
+
config,
|
640
|
+
declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
|
641
|
+
) # type: ignore # The retriever type was already checked
|
405
642
|
|
406
643
|
def create_session_token_authenticator(
|
407
644
|
self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
|
408
645
|
) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
|
409
|
-
decoder =
|
646
|
+
decoder = (
|
647
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
648
|
+
if model.decoder
|
649
|
+
else JsonDecoder(parameters={})
|
650
|
+
)
|
410
651
|
login_requester = self._create_component_from_model(
|
411
|
-
model=model.login_requester,
|
652
|
+
model=model.login_requester,
|
653
|
+
config=config,
|
654
|
+
name=f"{name}_login_requester",
|
655
|
+
decoder=decoder,
|
412
656
|
)
|
413
657
|
token_provider = SessionTokenProvider(
|
414
658
|
login_requester=login_requester,
|
415
659
|
session_token_path=model.session_token_path,
|
416
|
-
expiration_duration=parse_duration(model.expiration_duration)
|
660
|
+
expiration_duration=parse_duration(model.expiration_duration)
|
661
|
+
if model.expiration_duration
|
662
|
+
else None,
|
417
663
|
parameters=model.parameters or {},
|
418
664
|
message_repository=self._message_repository,
|
419
665
|
decoder=decoder,
|
@@ -426,28 +672,46 @@ class ModelToComponentFactory:
|
|
426
672
|
)
|
427
673
|
else:
|
428
674
|
return ModelToComponentFactory.create_api_key_authenticator(
|
429
|
-
ApiKeyAuthenticatorModel(
|
675
|
+
ApiKeyAuthenticatorModel(
|
676
|
+
type="ApiKeyAuthenticator",
|
677
|
+
api_token="",
|
678
|
+
inject_into=model.request_authentication.inject_into,
|
679
|
+
), # type: ignore # $parameters and headers default to None
|
430
680
|
config=config,
|
431
681
|
token_provider=token_provider,
|
432
682
|
)
|
433
683
|
|
434
684
|
@staticmethod
|
435
|
-
def create_basic_http_authenticator(
|
685
|
+
def create_basic_http_authenticator(
|
686
|
+
model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
|
687
|
+
) -> BasicHttpAuthenticator:
|
436
688
|
return BasicHttpAuthenticator(
|
437
|
-
password=model.password or "",
|
689
|
+
password=model.password or "",
|
690
|
+
username=model.username,
|
691
|
+
config=config,
|
692
|
+
parameters=model.parameters or {},
|
438
693
|
)
|
439
694
|
|
440
695
|
@staticmethod
|
441
696
|
def create_bearer_authenticator(
|
442
|
-
model: BearerAuthenticatorModel,
|
697
|
+
model: BearerAuthenticatorModel,
|
698
|
+
config: Config,
|
699
|
+
token_provider: Optional[TokenProvider] = None,
|
700
|
+
**kwargs: Any,
|
443
701
|
) -> BearerAuthenticator:
|
444
702
|
if token_provider is not None and model.api_token != "":
|
445
|
-
raise ValueError(
|
703
|
+
raise ValueError(
|
704
|
+
"If token_provider is set, api_token is ignored and has to be set to empty string."
|
705
|
+
)
|
446
706
|
return BearerAuthenticator(
|
447
707
|
token_provider=(
|
448
708
|
token_provider
|
449
709
|
if token_provider is not None
|
450
|
-
else InterpolatedStringTokenProvider(
|
710
|
+
else InterpolatedStringTokenProvider(
|
711
|
+
api_token=model.api_token or "",
|
712
|
+
config=config,
|
713
|
+
parameters=model.parameters or {},
|
714
|
+
)
|
451
715
|
),
|
452
716
|
config=config,
|
453
717
|
parameters=model.parameters or {},
|
@@ -457,14 +721,21 @@ class ModelToComponentFactory:
|
|
457
721
|
def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
|
458
722
|
return CheckStream(stream_names=model.stream_names, parameters={})
|
459
723
|
|
460
|
-
def create_composite_error_handler(
|
724
|
+
def create_composite_error_handler(
|
725
|
+
self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
|
726
|
+
) -> CompositeErrorHandler:
|
461
727
|
error_handlers = [
|
462
|
-
self._create_component_from_model(model=error_handler_model, config=config)
|
728
|
+
self._create_component_from_model(model=error_handler_model, config=config)
|
729
|
+
for error_handler_model in model.error_handlers
|
463
730
|
]
|
464
|
-
return CompositeErrorHandler(
|
731
|
+
return CompositeErrorHandler(
|
732
|
+
error_handlers=error_handlers, parameters=model.parameters or {}
|
733
|
+
)
|
465
734
|
|
466
735
|
@staticmethod
|
467
|
-
def create_concurrency_level(
|
736
|
+
def create_concurrency_level(
|
737
|
+
model: ConcurrencyLevelModel, config: Config, **kwargs: Any
|
738
|
+
) -> ConcurrencyLevel:
|
468
739
|
return ConcurrencyLevel(
|
469
740
|
default_concurrency=model.default_concurrency,
|
470
741
|
max_concurrency=model.max_concurrency,
|
@@ -483,26 +754,32 @@ class ModelToComponentFactory:
|
|
483
754
|
stream_state: MutableMapping[str, Any],
|
484
755
|
**kwargs: Any,
|
485
756
|
) -> Tuple[ConcurrentCursor, DateTimeStreamStateConverter]:
|
486
|
-
|
487
757
|
component_type = component_definition.get("type")
|
488
758
|
if component_definition.get("type") != model_type.__name__:
|
489
|
-
raise ValueError(
|
759
|
+
raise ValueError(
|
760
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
761
|
+
)
|
490
762
|
|
491
763
|
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
492
764
|
|
493
765
|
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
494
|
-
raise ValueError(
|
766
|
+
raise ValueError(
|
767
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
768
|
+
)
|
495
769
|
|
496
770
|
interpolated_cursor_field = InterpolatedString.create(
|
497
|
-
datetime_based_cursor_model.cursor_field,
|
771
|
+
datetime_based_cursor_model.cursor_field,
|
772
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
498
773
|
)
|
499
774
|
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
500
775
|
|
501
776
|
interpolated_partition_field_start = InterpolatedString.create(
|
502
|
-
datetime_based_cursor_model.partition_field_start or "start_time",
|
777
|
+
datetime_based_cursor_model.partition_field_start or "start_time",
|
778
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
503
779
|
)
|
504
780
|
interpolated_partition_field_end = InterpolatedString.create(
|
505
|
-
datetime_based_cursor_model.partition_field_end or "end_time",
|
781
|
+
datetime_based_cursor_model.partition_field_end or "end_time",
|
782
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
506
783
|
)
|
507
784
|
|
508
785
|
slice_boundary_fields = (
|
@@ -513,12 +790,17 @@ class ModelToComponentFactory:
|
|
513
790
|
datetime_format = datetime_based_cursor_model.datetime_format
|
514
791
|
|
515
792
|
cursor_granularity = (
|
516
|
-
parse_duration(datetime_based_cursor_model.cursor_granularity)
|
793
|
+
parse_duration(datetime_based_cursor_model.cursor_granularity)
|
794
|
+
if datetime_based_cursor_model.cursor_granularity
|
795
|
+
else None
|
517
796
|
)
|
518
797
|
|
519
798
|
lookback_window = None
|
520
799
|
interpolated_lookback_window = (
|
521
|
-
InterpolatedString.create(
|
800
|
+
InterpolatedString.create(
|
801
|
+
datetime_based_cursor_model.lookback_window,
|
802
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
803
|
+
)
|
522
804
|
if datetime_based_cursor_model.lookback_window
|
523
805
|
else None
|
524
806
|
)
|
@@ -538,21 +820,30 @@ class ModelToComponentFactory:
|
|
538
820
|
|
539
821
|
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
540
822
|
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
541
|
-
start_date_runtime_value = self.create_min_max_datetime(
|
823
|
+
start_date_runtime_value = self.create_min_max_datetime(
|
824
|
+
model=datetime_based_cursor_model.start_datetime, config=config
|
825
|
+
)
|
542
826
|
else:
|
543
827
|
start_date_runtime_value = datetime_based_cursor_model.start_datetime
|
544
828
|
|
545
829
|
end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
|
546
830
|
if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
|
547
|
-
end_date_runtime_value = self.create_min_max_datetime(
|
831
|
+
end_date_runtime_value = self.create_min_max_datetime(
|
832
|
+
model=datetime_based_cursor_model.end_datetime, config=config
|
833
|
+
)
|
548
834
|
else:
|
549
835
|
end_date_runtime_value = datetime_based_cursor_model.end_datetime
|
550
836
|
|
551
837
|
interpolated_start_date = MinMaxDatetime.create(
|
552
|
-
interpolated_string_or_min_max_datetime=start_date_runtime_value,
|
838
|
+
interpolated_string_or_min_max_datetime=start_date_runtime_value,
|
839
|
+
parameters=datetime_based_cursor_model.parameters,
|
553
840
|
)
|
554
841
|
interpolated_end_date = (
|
555
|
-
None
|
842
|
+
None
|
843
|
+
if not end_date_runtime_value
|
844
|
+
else MinMaxDatetime.create(
|
845
|
+
end_date_runtime_value, datetime_based_cursor_model.parameters
|
846
|
+
)
|
556
847
|
)
|
557
848
|
|
558
849
|
# If datetime format is not specified then start/end datetime should inherit it from the stream slicer
|
@@ -563,10 +854,14 @@ class ModelToComponentFactory:
|
|
563
854
|
|
564
855
|
start_date = interpolated_start_date.get_datetime(config=config)
|
565
856
|
end_date_provider = (
|
566
|
-
partial(interpolated_end_date.get_datetime, config)
|
857
|
+
partial(interpolated_end_date.get_datetime, config)
|
858
|
+
if interpolated_end_date
|
859
|
+
else connector_state_converter.get_end_provider()
|
567
860
|
)
|
568
861
|
|
569
|
-
if (
|
862
|
+
if (
|
863
|
+
datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
|
864
|
+
) or (
|
570
865
|
not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
|
571
866
|
):
|
572
867
|
raise ValueError(
|
@@ -577,7 +872,10 @@ class ModelToComponentFactory:
|
|
577
872
|
# When step is not defined, default to a step size from the starting date to the present moment
|
578
873
|
step_length = datetime.timedelta.max
|
579
874
|
interpolated_step = (
|
580
|
-
InterpolatedString.create(
|
875
|
+
InterpolatedString.create(
|
876
|
+
datetime_based_cursor_model.step,
|
877
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
878
|
+
)
|
581
879
|
if datetime_based_cursor_model.step
|
582
880
|
else None
|
583
881
|
)
|
@@ -606,7 +904,9 @@ class ModelToComponentFactory:
|
|
606
904
|
)
|
607
905
|
|
608
906
|
@staticmethod
|
609
|
-
def create_constant_backoff_strategy(
|
907
|
+
def create_constant_backoff_strategy(
|
908
|
+
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
909
|
+
) -> ConstantBackoffStrategy:
|
610
910
|
return ConstantBackoffStrategy(
|
611
911
|
backoff_time_in_seconds=model.backoff_time_in_seconds,
|
612
912
|
config=config,
|
@@ -624,7 +924,9 @@ class ModelToComponentFactory:
|
|
624
924
|
decoder_to_use = decoder
|
625
925
|
else:
|
626
926
|
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
627
|
-
raise ValueError(
|
927
|
+
raise ValueError(
|
928
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
929
|
+
)
|
628
930
|
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
629
931
|
|
630
932
|
return CursorPaginationStrategy(
|
@@ -660,18 +962,28 @@ class ModelToComponentFactory:
|
|
660
962
|
# the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
|
661
963
|
for model_field, model_value in model_args.items():
|
662
964
|
# If a custom component field doesn't have a type set, we try to use the type hints to infer the type
|
663
|
-
if
|
664
|
-
|
965
|
+
if (
|
966
|
+
isinstance(model_value, dict)
|
967
|
+
and "type" not in model_value
|
968
|
+
and model_field in component_fields
|
969
|
+
):
|
970
|
+
derived_type = self._derive_component_type_from_type_hints(
|
971
|
+
component_fields.get(model_field)
|
972
|
+
)
|
665
973
|
if derived_type:
|
666
974
|
model_value["type"] = derived_type
|
667
975
|
|
668
976
|
if self._is_component(model_value):
|
669
|
-
model_args[model_field] = self._create_nested_component(
|
977
|
+
model_args[model_field] = self._create_nested_component(
|
978
|
+
model, model_field, model_value, config
|
979
|
+
)
|
670
980
|
elif isinstance(model_value, list):
|
671
981
|
vals = []
|
672
982
|
for v in model_value:
|
673
983
|
if isinstance(v, dict) and "type" not in v and model_field in component_fields:
|
674
|
-
derived_type = self._derive_component_type_from_type_hints(
|
984
|
+
derived_type = self._derive_component_type_from_type_hints(
|
985
|
+
component_fields.get(model_field)
|
986
|
+
)
|
675
987
|
if derived_type:
|
676
988
|
v["type"] = derived_type
|
677
989
|
if self._is_component(v):
|
@@ -680,7 +992,11 @@ class ModelToComponentFactory:
|
|
680
992
|
vals.append(v)
|
681
993
|
model_args[model_field] = vals
|
682
994
|
|
683
|
-
kwargs = {
|
995
|
+
kwargs = {
|
996
|
+
class_field: model_args[class_field]
|
997
|
+
for class_field in component_fields.keys()
|
998
|
+
if class_field in model_args
|
999
|
+
}
|
684
1000
|
return custom_component_class(**kwargs)
|
685
1001
|
|
686
1002
|
@staticmethod
|
@@ -724,7 +1040,9 @@ class ModelToComponentFactory:
|
|
724
1040
|
else:
|
725
1041
|
return []
|
726
1042
|
|
727
|
-
def _create_nested_component(
|
1043
|
+
def _create_nested_component(
|
1044
|
+
self, model: Any, model_field: str, model_value: Any, config: Config
|
1045
|
+
) -> Any:
|
728
1046
|
type_name = model_value.get("type", None)
|
729
1047
|
if not type_name:
|
730
1048
|
# If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
|
@@ -743,16 +1061,29 @@ class ModelToComponentFactory:
|
|
743
1061
|
model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
|
744
1062
|
constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
|
745
1063
|
model_parameters = model_value.get("$parameters", {})
|
746
|
-
matching_parameters = {
|
747
|
-
|
1064
|
+
matching_parameters = {
|
1065
|
+
kwarg: model_parameters[kwarg]
|
1066
|
+
for kwarg in constructor_kwargs
|
1067
|
+
if kwarg in model_parameters
|
1068
|
+
}
|
1069
|
+
return self._create_component_from_model(
|
1070
|
+
model=parsed_model, config=config, **matching_parameters
|
1071
|
+
)
|
748
1072
|
except TypeError as error:
|
749
1073
|
missing_parameters = self._extract_missing_parameters(error)
|
750
1074
|
if missing_parameters:
|
751
1075
|
raise ValueError(
|
752
1076
|
f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
|
753
|
-
+ ", ".join(
|
1077
|
+
+ ", ".join(
|
1078
|
+
(
|
1079
|
+
f"{type_name}.$parameters.{parameter}"
|
1080
|
+
for parameter in missing_parameters
|
1081
|
+
)
|
1082
|
+
)
|
754
1083
|
)
|
755
|
-
raise TypeError(
|
1084
|
+
raise TypeError(
|
1085
|
+
f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
|
1086
|
+
)
|
756
1087
|
else:
|
757
1088
|
raise ValueError(
|
758
1089
|
f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
|
@@ -762,18 +1093,26 @@ class ModelToComponentFactory:
|
|
762
1093
|
def _is_component(model_value: Any) -> bool:
|
763
1094
|
return isinstance(model_value, dict) and model_value.get("type") is not None
|
764
1095
|
|
765
|
-
def create_datetime_based_cursor(
|
1096
|
+
def create_datetime_based_cursor(
|
1097
|
+
self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
|
1098
|
+
) -> DatetimeBasedCursor:
|
766
1099
|
start_datetime: Union[str, MinMaxDatetime] = (
|
767
|
-
model.start_datetime
|
1100
|
+
model.start_datetime
|
1101
|
+
if isinstance(model.start_datetime, str)
|
1102
|
+
else self.create_min_max_datetime(model.start_datetime, config)
|
768
1103
|
)
|
769
1104
|
end_datetime: Union[str, MinMaxDatetime, None] = None
|
770
1105
|
if model.is_data_feed and model.end_datetime:
|
771
1106
|
raise ValueError("Data feed does not support end_datetime")
|
772
1107
|
if model.is_data_feed and model.is_client_side_incremental:
|
773
|
-
raise ValueError(
|
1108
|
+
raise ValueError(
|
1109
|
+
"`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
|
1110
|
+
)
|
774
1111
|
if model.end_datetime:
|
775
1112
|
end_datetime = (
|
776
|
-
model.end_datetime
|
1113
|
+
model.end_datetime
|
1114
|
+
if isinstance(model.end_datetime, str)
|
1115
|
+
else self.create_min_max_datetime(model.end_datetime, config)
|
777
1116
|
)
|
778
1117
|
|
779
1118
|
end_time_option = (
|
@@ -797,7 +1136,9 @@ class ModelToComponentFactory:
|
|
797
1136
|
|
798
1137
|
return DatetimeBasedCursor(
|
799
1138
|
cursor_field=model.cursor_field,
|
800
|
-
cursor_datetime_formats=model.cursor_datetime_formats
|
1139
|
+
cursor_datetime_formats=model.cursor_datetime_formats
|
1140
|
+
if model.cursor_datetime_formats
|
1141
|
+
else [],
|
801
1142
|
cursor_granularity=model.cursor_granularity,
|
802
1143
|
datetime_format=model.datetime_format,
|
803
1144
|
end_datetime=end_datetime,
|
@@ -814,7 +1155,9 @@ class ModelToComponentFactory:
|
|
814
1155
|
parameters=model.parameters or {},
|
815
1156
|
)
|
816
1157
|
|
817
|
-
def create_declarative_stream(
|
1158
|
+
def create_declarative_stream(
|
1159
|
+
self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
|
1160
|
+
) -> DeclarativeStream:
|
818
1161
|
# When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
|
819
1162
|
# components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
|
820
1163
|
# Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
|
@@ -823,7 +1166,9 @@ class ModelToComponentFactory:
|
|
823
1166
|
|
824
1167
|
primary_key = model.primary_key.__root__ if model.primary_key else None
|
825
1168
|
stop_condition_on_cursor = (
|
826
|
-
model.incremental_sync
|
1169
|
+
model.incremental_sync
|
1170
|
+
and hasattr(model.incremental_sync, "is_data_feed")
|
1171
|
+
and model.incremental_sync.is_data_feed
|
827
1172
|
)
|
828
1173
|
client_side_incremental_sync = None
|
829
1174
|
if (
|
@@ -831,13 +1176,25 @@ class ModelToComponentFactory:
|
|
831
1176
|
and hasattr(model.incremental_sync, "is_client_side_incremental")
|
832
1177
|
and model.incremental_sync.is_client_side_incremental
|
833
1178
|
):
|
834
|
-
supported_slicers = (
|
1179
|
+
supported_slicers = (
|
1180
|
+
DatetimeBasedCursor,
|
1181
|
+
GlobalSubstreamCursor,
|
1182
|
+
PerPartitionWithGlobalCursor,
|
1183
|
+
)
|
835
1184
|
if combined_slicers and not isinstance(combined_slicers, supported_slicers):
|
836
|
-
raise ValueError(
|
1185
|
+
raise ValueError(
|
1186
|
+
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1187
|
+
)
|
837
1188
|
client_side_incremental_sync = {
|
838
|
-
"date_time_based_cursor": self._create_component_from_model(
|
1189
|
+
"date_time_based_cursor": self._create_component_from_model(
|
1190
|
+
model=model.incremental_sync, config=config
|
1191
|
+
),
|
839
1192
|
"substream_cursor": (
|
840
|
-
combined_slicers
|
1193
|
+
combined_slicers
|
1194
|
+
if isinstance(
|
1195
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1196
|
+
)
|
1197
|
+
else None
|
841
1198
|
),
|
842
1199
|
}
|
843
1200
|
|
@@ -877,7 +1234,9 @@ class ModelToComponentFactory:
|
|
877
1234
|
transformations = []
|
878
1235
|
if model.transformations:
|
879
1236
|
for transformation_model in model.transformations:
|
880
|
-
transformations.append(
|
1237
|
+
transformations.append(
|
1238
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
1239
|
+
)
|
881
1240
|
retriever = self._create_component_from_model(
|
882
1241
|
model=model.retriever,
|
883
1242
|
config=config,
|
@@ -900,7 +1259,9 @@ class ModelToComponentFactory:
|
|
900
1259
|
state_transformations = []
|
901
1260
|
|
902
1261
|
if model.schema_loader:
|
903
|
-
schema_loader = self._create_component_from_model(
|
1262
|
+
schema_loader = self._create_component_from_model(
|
1263
|
+
model=model.schema_loader, config=config
|
1264
|
+
)
|
904
1265
|
else:
|
905
1266
|
options = model.parameters or {}
|
906
1267
|
if "name" not in options:
|
@@ -918,7 +1279,9 @@ class ModelToComponentFactory:
|
|
918
1279
|
parameters=model.parameters or {},
|
919
1280
|
)
|
920
1281
|
|
921
|
-
def _merge_stream_slicers(
|
1282
|
+
def _merge_stream_slicers(
|
1283
|
+
self, model: DeclarativeStreamModel, config: Config
|
1284
|
+
) -> Optional[StreamSlicer]:
|
922
1285
|
stream_slicer = None
|
923
1286
|
if (
|
924
1287
|
hasattr(model.retriever, "partition_router")
|
@@ -929,50 +1292,85 @@ class ModelToComponentFactory:
|
|
929
1292
|
|
930
1293
|
if isinstance(stream_slicer_model, list):
|
931
1294
|
stream_slicer = CartesianProductStreamSlicer(
|
932
|
-
[
|
1295
|
+
[
|
1296
|
+
self._create_component_from_model(model=slicer, config=config)
|
1297
|
+
for slicer in stream_slicer_model
|
1298
|
+
],
|
1299
|
+
parameters={},
|
933
1300
|
)
|
934
1301
|
else:
|
935
|
-
stream_slicer = self._create_component_from_model(
|
1302
|
+
stream_slicer = self._create_component_from_model(
|
1303
|
+
model=stream_slicer_model, config=config
|
1304
|
+
)
|
936
1305
|
|
937
1306
|
if model.incremental_sync and stream_slicer:
|
938
1307
|
incremental_sync_model = model.incremental_sync
|
939
|
-
if
|
940
|
-
|
941
|
-
|
1308
|
+
if (
|
1309
|
+
hasattr(incremental_sync_model, "global_substream_cursor")
|
1310
|
+
and incremental_sync_model.global_substream_cursor
|
1311
|
+
):
|
1312
|
+
cursor_component = self._create_component_from_model(
|
1313
|
+
model=incremental_sync_model, config=config
|
1314
|
+
)
|
1315
|
+
return GlobalSubstreamCursor(
|
1316
|
+
stream_cursor=cursor_component, partition_router=stream_slicer
|
1317
|
+
)
|
942
1318
|
else:
|
943
|
-
cursor_component = self._create_component_from_model(
|
1319
|
+
cursor_component = self._create_component_from_model(
|
1320
|
+
model=incremental_sync_model, config=config
|
1321
|
+
)
|
944
1322
|
return PerPartitionWithGlobalCursor(
|
945
1323
|
cursor_factory=CursorFactory(
|
946
|
-
lambda: self._create_component_from_model(
|
1324
|
+
lambda: self._create_component_from_model(
|
1325
|
+
model=incremental_sync_model, config=config
|
1326
|
+
),
|
947
1327
|
),
|
948
1328
|
partition_router=stream_slicer,
|
949
1329
|
stream_cursor=cursor_component,
|
950
1330
|
)
|
951
1331
|
elif model.incremental_sync:
|
952
|
-
return
|
1332
|
+
return (
|
1333
|
+
self._create_component_from_model(model=model.incremental_sync, config=config)
|
1334
|
+
if model.incremental_sync
|
1335
|
+
else None
|
1336
|
+
)
|
953
1337
|
elif stream_slicer:
|
954
1338
|
# For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
|
955
1339
|
return PerPartitionCursor(
|
956
|
-
cursor_factory=CursorFactory(
|
1340
|
+
cursor_factory=CursorFactory(
|
1341
|
+
create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
|
1342
|
+
),
|
957
1343
|
partition_router=stream_slicer,
|
958
1344
|
)
|
959
|
-
elif
|
1345
|
+
elif (
|
1346
|
+
hasattr(model.retriever, "paginator")
|
1347
|
+
and model.retriever.paginator
|
1348
|
+
and not stream_slicer
|
1349
|
+
):
|
960
1350
|
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
961
1351
|
return ResumableFullRefreshCursor(parameters={})
|
962
1352
|
else:
|
963
1353
|
return None
|
964
1354
|
|
965
|
-
def create_default_error_handler(
|
1355
|
+
def create_default_error_handler(
|
1356
|
+
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
1357
|
+
) -> DefaultErrorHandler:
|
966
1358
|
backoff_strategies = []
|
967
1359
|
if model.backoff_strategies:
|
968
1360
|
for backoff_strategy_model in model.backoff_strategies:
|
969
|
-
backoff_strategies.append(
|
1361
|
+
backoff_strategies.append(
|
1362
|
+
self._create_component_from_model(model=backoff_strategy_model, config=config)
|
1363
|
+
)
|
970
1364
|
|
971
1365
|
response_filters = []
|
972
1366
|
if model.response_filters:
|
973
1367
|
for response_filter_model in model.response_filters:
|
974
|
-
response_filters.append(
|
975
|
-
|
1368
|
+
response_filters.append(
|
1369
|
+
self._create_component_from_model(model=response_filter_model, config=config)
|
1370
|
+
)
|
1371
|
+
response_filters.append(
|
1372
|
+
HttpResponseFilter(config=config, parameters=model.parameters or {})
|
1373
|
+
)
|
976
1374
|
|
977
1375
|
return DefaultErrorHandler(
|
978
1376
|
backoff_strategies=backoff_strategies,
|
@@ -993,17 +1391,25 @@ class ModelToComponentFactory:
|
|
993
1391
|
) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
|
994
1392
|
if decoder:
|
995
1393
|
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
996
|
-
raise ValueError(
|
1394
|
+
raise ValueError(
|
1395
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1396
|
+
)
|
997
1397
|
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
998
1398
|
else:
|
999
1399
|
decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
|
1000
1400
|
page_size_option = (
|
1001
|
-
self._create_component_from_model(model=model.page_size_option, config=config)
|
1401
|
+
self._create_component_from_model(model=model.page_size_option, config=config)
|
1402
|
+
if model.page_size_option
|
1403
|
+
else None
|
1002
1404
|
)
|
1003
1405
|
page_token_option = (
|
1004
|
-
self._create_component_from_model(model=model.page_token_option, config=config)
|
1406
|
+
self._create_component_from_model(model=model.page_token_option, config=config)
|
1407
|
+
if model.page_token_option
|
1408
|
+
else None
|
1409
|
+
)
|
1410
|
+
pagination_strategy = self._create_component_from_model(
|
1411
|
+
model=model.pagination_strategy, config=config, decoder=decoder_to_use
|
1005
1412
|
)
|
1006
|
-
pagination_strategy = self._create_component_from_model(model=model.pagination_strategy, config=config, decoder=decoder_to_use)
|
1007
1413
|
if cursor_used_for_stop_condition:
|
1008
1414
|
pagination_strategy = StopConditionPaginationStrategyDecorator(
|
1009
1415
|
pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
|
@@ -1022,29 +1428,55 @@ class ModelToComponentFactory:
|
|
1022
1428
|
return paginator
|
1023
1429
|
|
1024
1430
|
def create_dpath_extractor(
|
1025
|
-
self,
|
1431
|
+
self,
|
1432
|
+
model: DpathExtractorModel,
|
1433
|
+
config: Config,
|
1434
|
+
decoder: Optional[Decoder] = None,
|
1435
|
+
**kwargs: Any,
|
1026
1436
|
) -> DpathExtractor:
|
1027
1437
|
if decoder:
|
1028
1438
|
decoder_to_use = decoder
|
1029
1439
|
else:
|
1030
1440
|
decoder_to_use = JsonDecoder(parameters={})
|
1031
1441
|
model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
|
1032
|
-
return DpathExtractor(
|
1442
|
+
return DpathExtractor(
|
1443
|
+
decoder=decoder_to_use,
|
1444
|
+
field_path=model_field_path,
|
1445
|
+
config=config,
|
1446
|
+
parameters=model.parameters or {},
|
1447
|
+
)
|
1033
1448
|
|
1034
1449
|
@staticmethod
|
1035
|
-
def create_exponential_backoff_strategy(
|
1036
|
-
|
1450
|
+
def create_exponential_backoff_strategy(
|
1451
|
+
model: ExponentialBackoffStrategyModel, config: Config
|
1452
|
+
) -> ExponentialBackoffStrategy:
|
1453
|
+
return ExponentialBackoffStrategy(
|
1454
|
+
factor=model.factor or 5, parameters=model.parameters or {}, config=config
|
1455
|
+
)
|
1037
1456
|
|
1038
|
-
def create_http_requester(
|
1457
|
+
def create_http_requester(
|
1458
|
+
self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str
|
1459
|
+
) -> HttpRequester:
|
1039
1460
|
authenticator = (
|
1040
|
-
self._create_component_from_model(
|
1461
|
+
self._create_component_from_model(
|
1462
|
+
model=model.authenticator,
|
1463
|
+
config=config,
|
1464
|
+
url_base=model.url_base,
|
1465
|
+
name=name,
|
1466
|
+
decoder=decoder,
|
1467
|
+
)
|
1041
1468
|
if model.authenticator
|
1042
1469
|
else None
|
1043
1470
|
)
|
1044
1471
|
error_handler = (
|
1045
1472
|
self._create_component_from_model(model=model.error_handler, config=config)
|
1046
1473
|
if model.error_handler
|
1047
|
-
else DefaultErrorHandler(
|
1474
|
+
else DefaultErrorHandler(
|
1475
|
+
backoff_strategies=[],
|
1476
|
+
response_filters=[],
|
1477
|
+
config=config,
|
1478
|
+
parameters=model.parameters or {},
|
1479
|
+
)
|
1048
1480
|
)
|
1049
1481
|
|
1050
1482
|
request_options_provider = InterpolatedRequestOptionsProvider(
|
@@ -1079,7 +1511,9 @@ class ModelToComponentFactory:
|
|
1079
1511
|
)
|
1080
1512
|
|
1081
1513
|
@staticmethod
|
1082
|
-
def create_http_response_filter(
|
1514
|
+
def create_http_response_filter(
|
1515
|
+
model: HttpResponseFilterModel, config: Config, **kwargs: Any
|
1516
|
+
) -> HttpResponseFilter:
|
1083
1517
|
if model.action:
|
1084
1518
|
action = ResponseAction(model.action.value)
|
1085
1519
|
else:
|
@@ -1103,7 +1537,9 @@ class ModelToComponentFactory:
|
|
1103
1537
|
)
|
1104
1538
|
|
1105
1539
|
@staticmethod
|
1106
|
-
def create_inline_schema_loader(
|
1540
|
+
def create_inline_schema_loader(
|
1541
|
+
model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
|
1542
|
+
) -> InlineSchemaLoader:
|
1107
1543
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
1108
1544
|
|
1109
1545
|
@staticmethod
|
@@ -1111,11 +1547,15 @@ class ModelToComponentFactory:
|
|
1111
1547
|
return JsonDecoder(parameters={})
|
1112
1548
|
|
1113
1549
|
@staticmethod
|
1114
|
-
def create_jsonl_decoder(
|
1550
|
+
def create_jsonl_decoder(
|
1551
|
+
model: JsonlDecoderModel, config: Config, **kwargs: Any
|
1552
|
+
) -> JsonlDecoder:
|
1115
1553
|
return JsonlDecoder(parameters={})
|
1116
1554
|
|
1117
1555
|
@staticmethod
|
1118
|
-
def create_iterable_decoder(
|
1556
|
+
def create_iterable_decoder(
|
1557
|
+
model: IterableDecoderModel, config: Config, **kwargs: Any
|
1558
|
+
) -> IterableDecoder:
|
1119
1559
|
return IterableDecoder(parameters={})
|
1120
1560
|
|
1121
1561
|
@staticmethod
|
@@ -1123,11 +1563,23 @@ class ModelToComponentFactory:
|
|
1123
1563
|
return XmlDecoder(parameters={})
|
1124
1564
|
|
1125
1565
|
@staticmethod
|
1126
|
-
def
|
1127
|
-
|
1566
|
+
def create_gzipjson_decoder(
|
1567
|
+
model: GzipJsonDecoderModel, config: Config, **kwargs: Any
|
1568
|
+
) -> GzipJsonDecoder:
|
1569
|
+
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
1570
|
+
|
1571
|
+
@staticmethod
|
1572
|
+
def create_json_file_schema_loader(
|
1573
|
+
model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
|
1574
|
+
) -> JsonFileSchemaLoader:
|
1575
|
+
return JsonFileSchemaLoader(
|
1576
|
+
file_path=model.file_path or "", config=config, parameters=model.parameters or {}
|
1577
|
+
)
|
1128
1578
|
|
1129
1579
|
@staticmethod
|
1130
|
-
def create_jwt_authenticator(
|
1580
|
+
def create_jwt_authenticator(
|
1581
|
+
model: JwtAuthenticatorModel, config: Config, **kwargs: Any
|
1582
|
+
) -> JwtAuthenticator:
|
1131
1583
|
jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
|
1132
1584
|
jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
|
1133
1585
|
return JwtAuthenticator(
|
@@ -1149,7 +1601,9 @@ class ModelToComponentFactory:
|
|
1149
1601
|
)
|
1150
1602
|
|
1151
1603
|
@staticmethod
|
1152
|
-
def create_list_partition_router(
|
1604
|
+
def create_list_partition_router(
|
1605
|
+
model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
1606
|
+
) -> ListPartitionRouter:
|
1153
1607
|
request_option = (
|
1154
1608
|
RequestOption(
|
1155
1609
|
inject_into=RequestOptionType(model.request_option.inject_into.value),
|
@@ -1168,7 +1622,9 @@ class ModelToComponentFactory:
|
|
1168
1622
|
)
|
1169
1623
|
|
1170
1624
|
@staticmethod
|
1171
|
-
def create_min_max_datetime(
|
1625
|
+
def create_min_max_datetime(
|
1626
|
+
model: MinMaxDatetimeModel, config: Config, **kwargs: Any
|
1627
|
+
) -> MinMaxDatetime:
|
1172
1628
|
return MinMaxDatetime(
|
1173
1629
|
datetime=model.datetime,
|
1174
1630
|
datetime_format=model.datetime_format or "",
|
@@ -1182,29 +1638,43 @@ class ModelToComponentFactory:
|
|
1182
1638
|
return NoAuth(parameters=model.parameters or {})
|
1183
1639
|
|
1184
1640
|
@staticmethod
|
1185
|
-
def create_no_pagination(
|
1641
|
+
def create_no_pagination(
|
1642
|
+
model: NoPaginationModel, config: Config, **kwargs: Any
|
1643
|
+
) -> NoPagination:
|
1186
1644
|
return NoPagination(parameters={})
|
1187
1645
|
|
1188
|
-
def create_oauth_authenticator(
|
1646
|
+
def create_oauth_authenticator(
|
1647
|
+
self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
|
1648
|
+
) -> DeclarativeOauth2Authenticator:
|
1189
1649
|
if model.refresh_token_updater:
|
1190
1650
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
1191
1651
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
1192
1652
|
config,
|
1193
|
-
InterpolatedString.create(
|
1653
|
+
InterpolatedString.create(
|
1654
|
+
model.token_refresh_endpoint, parameters=model.parameters or {}
|
1655
|
+
).eval(config),
|
1194
1656
|
access_token_name=InterpolatedString.create(
|
1195
1657
|
model.access_token_name or "access_token", parameters=model.parameters or {}
|
1196
1658
|
).eval(config),
|
1197
1659
|
refresh_token_name=model.refresh_token_updater.refresh_token_name,
|
1198
|
-
expires_in_name=InterpolatedString.create(
|
1199
|
-
|
1200
|
-
),
|
1201
|
-
client_id=InterpolatedString.create(
|
1202
|
-
|
1660
|
+
expires_in_name=InterpolatedString.create(
|
1661
|
+
model.expires_in_name or "expires_in", parameters=model.parameters or {}
|
1662
|
+
).eval(config),
|
1663
|
+
client_id=InterpolatedString.create(
|
1664
|
+
model.client_id, parameters=model.parameters or {}
|
1665
|
+
).eval(config),
|
1666
|
+
client_secret=InterpolatedString.create(
|
1667
|
+
model.client_secret, parameters=model.parameters or {}
|
1668
|
+
).eval(config),
|
1203
1669
|
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
1204
1670
|
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
1205
1671
|
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
1206
|
-
grant_type=InterpolatedString.create(
|
1207
|
-
|
1672
|
+
grant_type=InterpolatedString.create(
|
1673
|
+
model.grant_type or "refresh_token", parameters=model.parameters or {}
|
1674
|
+
).eval(config),
|
1675
|
+
refresh_request_body=InterpolatedMapping(
|
1676
|
+
model.refresh_request_body or {}, parameters=model.parameters or {}
|
1677
|
+
).eval(config),
|
1208
1678
|
scopes=model.scopes,
|
1209
1679
|
token_expiry_date_format=model.token_expiry_date_format,
|
1210
1680
|
message_repository=self._message_repository,
|
@@ -1232,7 +1702,9 @@ class ModelToComponentFactory:
|
|
1232
1702
|
)
|
1233
1703
|
|
1234
1704
|
@staticmethod
|
1235
|
-
def create_offset_increment(
|
1705
|
+
def create_offset_increment(
|
1706
|
+
model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
|
1707
|
+
) -> OffsetIncrement:
|
1236
1708
|
if isinstance(decoder, PaginationDecoderDecorator):
|
1237
1709
|
if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
|
1238
1710
|
raise ValueError(
|
@@ -1241,7 +1713,9 @@ class ModelToComponentFactory:
|
|
1241
1713
|
decoder_to_use = decoder
|
1242
1714
|
else:
|
1243
1715
|
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
1244
|
-
raise ValueError(
|
1716
|
+
raise ValueError(
|
1717
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1718
|
+
)
|
1245
1719
|
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1246
1720
|
return OffsetIncrement(
|
1247
1721
|
page_size=model.page_size,
|
@@ -1252,7 +1726,9 @@ class ModelToComponentFactory:
|
|
1252
1726
|
)
|
1253
1727
|
|
1254
1728
|
@staticmethod
|
1255
|
-
def create_page_increment(
|
1729
|
+
def create_page_increment(
|
1730
|
+
model: PageIncrementModel, config: Config, **kwargs: Any
|
1731
|
+
) -> PageIncrement:
|
1256
1732
|
return PageIncrement(
|
1257
1733
|
page_size=model.page_size,
|
1258
1734
|
config=config,
|
@@ -1261,9 +1737,15 @@ class ModelToComponentFactory:
|
|
1261
1737
|
parameters=model.parameters or {},
|
1262
1738
|
)
|
1263
1739
|
|
1264
|
-
def create_parent_stream_config(
|
1740
|
+
def create_parent_stream_config(
|
1741
|
+
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
1742
|
+
) -> ParentStreamConfig:
|
1265
1743
|
declarative_stream = self._create_component_from_model(model.stream, config=config)
|
1266
|
-
request_option =
|
1744
|
+
request_option = (
|
1745
|
+
self._create_component_from_model(model.request_option, config=config)
|
1746
|
+
if model.request_option
|
1747
|
+
else None
|
1748
|
+
)
|
1267
1749
|
return ParentStreamConfig(
|
1268
1750
|
parent_key=model.parent_key,
|
1269
1751
|
request_option=request_option,
|
@@ -1276,15 +1758,21 @@ class ModelToComponentFactory:
|
|
1276
1758
|
)
|
1277
1759
|
|
1278
1760
|
@staticmethod
|
1279
|
-
def create_record_filter(
|
1280
|
-
|
1761
|
+
def create_record_filter(
|
1762
|
+
model: RecordFilterModel, config: Config, **kwargs: Any
|
1763
|
+
) -> RecordFilter:
|
1764
|
+
return RecordFilter(
|
1765
|
+
condition=model.condition or "", config=config, parameters=model.parameters or {}
|
1766
|
+
)
|
1281
1767
|
|
1282
1768
|
@staticmethod
|
1283
1769
|
def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
|
1284
1770
|
return RequestPath(parameters={})
|
1285
1771
|
|
1286
1772
|
@staticmethod
|
1287
|
-
def create_request_option(
|
1773
|
+
def create_request_option(
|
1774
|
+
model: RequestOptionModel, config: Config, **kwargs: Any
|
1775
|
+
) -> RequestOption:
|
1288
1776
|
inject_into = RequestOptionType(model.inject_into.value)
|
1289
1777
|
return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
|
1290
1778
|
|
@@ -1299,16 +1787,26 @@ class ModelToComponentFactory:
|
|
1299
1787
|
**kwargs: Any,
|
1300
1788
|
) -> RecordSelector:
|
1301
1789
|
assert model.schema_normalization is not None # for mypy
|
1302
|
-
extractor = self._create_component_from_model(
|
1303
|
-
|
1790
|
+
extractor = self._create_component_from_model(
|
1791
|
+
model=model.extractor, decoder=decoder, config=config
|
1792
|
+
)
|
1793
|
+
record_filter = (
|
1794
|
+
self._create_component_from_model(model.record_filter, config=config)
|
1795
|
+
if model.record_filter
|
1796
|
+
else None
|
1797
|
+
)
|
1304
1798
|
if client_side_incremental_sync:
|
1305
1799
|
record_filter = ClientSideIncrementalRecordFilterDecorator(
|
1306
1800
|
config=config,
|
1307
1801
|
parameters=model.parameters,
|
1308
|
-
condition=model.record_filter.condition
|
1802
|
+
condition=model.record_filter.condition
|
1803
|
+
if (model.record_filter and hasattr(model.record_filter, "condition"))
|
1804
|
+
else None,
|
1309
1805
|
**client_side_incremental_sync,
|
1310
1806
|
)
|
1311
|
-
schema_normalization = TypeTransformer(
|
1807
|
+
schema_normalization = TypeTransformer(
|
1808
|
+
SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]
|
1809
|
+
)
|
1312
1810
|
|
1313
1811
|
return RecordSelector(
|
1314
1812
|
extractor=extractor,
|
@@ -1320,11 +1818,20 @@ class ModelToComponentFactory:
|
|
1320
1818
|
)
|
1321
1819
|
|
1322
1820
|
@staticmethod
|
1323
|
-
def create_remove_fields(
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
|
1821
|
+
def create_remove_fields(
|
1822
|
+
model: RemoveFieldsModel, config: Config, **kwargs: Any
|
1823
|
+
) -> RemoveFields:
|
1824
|
+
return RemoveFields(
|
1825
|
+
field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
|
1826
|
+
)
|
1827
|
+
|
1828
|
+
def create_selective_authenticator(
|
1829
|
+
self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
|
1830
|
+
) -> DeclarativeAuthenticator:
|
1831
|
+
authenticators = {
|
1832
|
+
name: self._create_component_from_model(model=auth, config=config)
|
1833
|
+
for name, auth in model.authenticators.items()
|
1834
|
+
}
|
1328
1835
|
# SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
|
1329
1836
|
return SelectiveAuthenticator( # type: ignore[abstract]
|
1330
1837
|
config=config,
|
@@ -1363,8 +1870,14 @@ class ModelToComponentFactory:
|
|
1363
1870
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
1364
1871
|
transformations: List[RecordTransformation],
|
1365
1872
|
) -> SimpleRetriever:
|
1366
|
-
decoder =
|
1367
|
-
|
1873
|
+
decoder = (
|
1874
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
1875
|
+
if model.decoder
|
1876
|
+
else JsonDecoder(parameters={})
|
1877
|
+
)
|
1878
|
+
requester = self._create_component_from_model(
|
1879
|
+
model=model.requester, decoder=decoder, config=config, name=name
|
1880
|
+
)
|
1368
1881
|
record_selector = self._create_component_from_model(
|
1369
1882
|
model=model.record_selector,
|
1370
1883
|
config=config,
|
@@ -1372,12 +1885,19 @@ class ModelToComponentFactory:
|
|
1372
1885
|
transformations=transformations,
|
1373
1886
|
client_side_incremental_sync=client_side_incremental_sync,
|
1374
1887
|
)
|
1375
|
-
url_base =
|
1888
|
+
url_base = (
|
1889
|
+
model.requester.url_base
|
1890
|
+
if hasattr(model.requester, "url_base")
|
1891
|
+
else requester.get_url_base()
|
1892
|
+
)
|
1376
1893
|
|
1377
1894
|
# Define cursor only if per partition or common incremental support is needed
|
1378
1895
|
cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
|
1379
1896
|
|
1380
|
-
if
|
1897
|
+
if (
|
1898
|
+
not isinstance(stream_slicer, DatetimeBasedCursor)
|
1899
|
+
or type(stream_slicer) is not DatetimeBasedCursor
|
1900
|
+
):
|
1381
1901
|
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
1382
1902
|
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
1383
1903
|
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
@@ -1401,7 +1921,9 @@ class ModelToComponentFactory:
|
|
1401
1921
|
else NoPagination(parameters={})
|
1402
1922
|
)
|
1403
1923
|
|
1404
|
-
ignore_stream_slicer_parameters_on_paginated_requests =
|
1924
|
+
ignore_stream_slicer_parameters_on_paginated_requests = (
|
1925
|
+
model.ignore_stream_slicer_parameters_on_paginated_requests or False
|
1926
|
+
)
|
1405
1927
|
|
1406
1928
|
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
1407
1929
|
return SimpleRetrieverTestReadDecorator(
|
@@ -1468,14 +1990,19 @@ class ModelToComponentFactory:
|
|
1468
1990
|
config: Config,
|
1469
1991
|
*,
|
1470
1992
|
name: str,
|
1471
|
-
primary_key: Optional[
|
1993
|
+
primary_key: Optional[
|
1994
|
+
Union[str, List[str], List[List[str]]]
|
1995
|
+
], # this seems to be needed to match create_simple_retriever
|
1472
1996
|
stream_slicer: Optional[StreamSlicer],
|
1473
1997
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
1474
1998
|
transformations: List[RecordTransformation],
|
1475
1999
|
**kwargs: Any,
|
1476
2000
|
) -> AsyncRetriever:
|
1477
|
-
|
1478
|
-
|
2001
|
+
decoder = (
|
2002
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
2003
|
+
if model.decoder
|
2004
|
+
else JsonDecoder(parameters={})
|
2005
|
+
)
|
1479
2006
|
record_selector = self._create_component_from_model(
|
1480
2007
|
model=model.record_selector,
|
1481
2008
|
config=config,
|
@@ -1485,14 +2012,23 @@ class ModelToComponentFactory:
|
|
1485
2012
|
)
|
1486
2013
|
stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
|
1487
2014
|
creation_requester = self._create_component_from_model(
|
1488
|
-
model=model.creation_requester,
|
2015
|
+
model=model.creation_requester,
|
2016
|
+
decoder=decoder,
|
2017
|
+
config=config,
|
2018
|
+
name=f"job creation - {name}",
|
1489
2019
|
)
|
1490
2020
|
polling_requester = self._create_component_from_model(
|
1491
|
-
model=model.polling_requester,
|
2021
|
+
model=model.polling_requester,
|
2022
|
+
decoder=decoder,
|
2023
|
+
config=config,
|
2024
|
+
name=f"job polling - {name}",
|
1492
2025
|
)
|
1493
2026
|
job_download_components_name = f"job download - {name}"
|
1494
2027
|
download_requester = self._create_component_from_model(
|
1495
|
-
model=model.download_requester,
|
2028
|
+
model=model.download_requester,
|
2029
|
+
decoder=decoder,
|
2030
|
+
config=config,
|
2031
|
+
name=job_download_components_name,
|
1496
2032
|
)
|
1497
2033
|
download_retriever = SimpleRetriever(
|
1498
2034
|
requester=download_requester,
|
@@ -1507,7 +2043,9 @@ class ModelToComponentFactory:
|
|
1507
2043
|
primary_key=None,
|
1508
2044
|
name=job_download_components_name,
|
1509
2045
|
paginator=(
|
1510
|
-
self._create_component_from_model(
|
2046
|
+
self._create_component_from_model(
|
2047
|
+
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2048
|
+
)
|
1511
2049
|
if model.download_paginator
|
1512
2050
|
else NoPagination(parameters={})
|
1513
2051
|
),
|
@@ -1515,17 +2053,31 @@ class ModelToComponentFactory:
|
|
1515
2053
|
parameters={},
|
1516
2054
|
)
|
1517
2055
|
abort_requester = (
|
1518
|
-
self._create_component_from_model(
|
2056
|
+
self._create_component_from_model(
|
2057
|
+
model=model.abort_requester,
|
2058
|
+
decoder=decoder,
|
2059
|
+
config=config,
|
2060
|
+
name=f"job abort - {name}",
|
2061
|
+
)
|
1519
2062
|
if model.abort_requester
|
1520
2063
|
else None
|
1521
2064
|
)
|
1522
2065
|
delete_requester = (
|
1523
|
-
self._create_component_from_model(
|
2066
|
+
self._create_component_from_model(
|
2067
|
+
model=model.delete_requester,
|
2068
|
+
decoder=decoder,
|
2069
|
+
config=config,
|
2070
|
+
name=f"job delete - {name}",
|
2071
|
+
)
|
1524
2072
|
if model.delete_requester
|
1525
2073
|
else None
|
1526
2074
|
)
|
1527
|
-
status_extractor = self._create_component_from_model(
|
1528
|
-
|
2075
|
+
status_extractor = self._create_component_from_model(
|
2076
|
+
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2077
|
+
)
|
2078
|
+
urls_extractor = self._create_component_from_model(
|
2079
|
+
model=model.urls_extractor, decoder=decoder, config=config, name=name
|
2080
|
+
)
|
1529
2081
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
1530
2082
|
creation_requester=creation_requester,
|
1531
2083
|
polling_requester=polling_requester,
|
@@ -1541,7 +2093,9 @@ class ModelToComponentFactory:
|
|
1541
2093
|
job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
|
1542
2094
|
job_repository,
|
1543
2095
|
stream_slices,
|
1544
|
-
JobTracker(
|
2096
|
+
JobTracker(
|
2097
|
+
1
|
2098
|
+
), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
1545
2099
|
self._message_repository,
|
1546
2100
|
has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
1547
2101
|
),
|
@@ -1567,14 +2121,22 @@ class ModelToComponentFactory:
|
|
1567
2121
|
if model.parent_stream_configs:
|
1568
2122
|
parent_stream_configs.extend(
|
1569
2123
|
[
|
1570
|
-
self._create_message_repository_substream_wrapper(
|
2124
|
+
self._create_message_repository_substream_wrapper(
|
2125
|
+
model=parent_stream_config, config=config
|
2126
|
+
)
|
1571
2127
|
for parent_stream_config in model.parent_stream_configs
|
1572
2128
|
]
|
1573
2129
|
)
|
1574
2130
|
|
1575
|
-
return SubstreamPartitionRouter(
|
2131
|
+
return SubstreamPartitionRouter(
|
2132
|
+
parent_stream_configs=parent_stream_configs,
|
2133
|
+
parameters=model.parameters or {},
|
2134
|
+
config=config,
|
2135
|
+
)
|
1576
2136
|
|
1577
|
-
def _create_message_repository_substream_wrapper(
|
2137
|
+
def _create_message_repository_substream_wrapper(
|
2138
|
+
self, model: ParentStreamConfigModel, config: Config
|
2139
|
+
) -> Any:
|
1578
2140
|
substream_factory = ModelToComponentFactory(
|
1579
2141
|
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
1580
2142
|
limit_slices_fetched=self._limit_slices_fetched,
|
@@ -1590,13 +2152,17 @@ class ModelToComponentFactory:
|
|
1590
2152
|
return substream_factory._create_component_from_model(model=model, config=config)
|
1591
2153
|
|
1592
2154
|
@staticmethod
|
1593
|
-
def create_wait_time_from_header(
|
2155
|
+
def create_wait_time_from_header(
|
2156
|
+
model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
|
2157
|
+
) -> WaitTimeFromHeaderBackoffStrategy:
|
1594
2158
|
return WaitTimeFromHeaderBackoffStrategy(
|
1595
2159
|
header=model.header,
|
1596
2160
|
parameters=model.parameters or {},
|
1597
2161
|
config=config,
|
1598
2162
|
regex=model.regex,
|
1599
|
-
max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
|
2163
|
+
max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
|
2164
|
+
if model.max_waiting_time_in_seconds is not None
|
2165
|
+
else None,
|
1600
2166
|
)
|
1601
2167
|
|
1602
2168
|
@staticmethod
|
@@ -1604,7 +2170,11 @@ class ModelToComponentFactory:
|
|
1604
2170
|
model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
|
1605
2171
|
) -> WaitUntilTimeFromHeaderBackoffStrategy:
|
1606
2172
|
return WaitUntilTimeFromHeaderBackoffStrategy(
|
1607
|
-
header=model.header,
|
2173
|
+
header=model.header,
|
2174
|
+
parameters=model.parameters or {},
|
2175
|
+
config=config,
|
2176
|
+
min_wait=model.min_wait,
|
2177
|
+
regex=model.regex,
|
1608
2178
|
)
|
1609
2179
|
|
1610
2180
|
def get_message_repository(self) -> MessageRepository:
|