airbyte-cdk 0.72.1__py3-none-any.whl → 6.17.1.dev1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/__init__.py +355 -6
- airbyte_cdk/cli/__init__.py +1 -0
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +5 -0
- airbyte_cdk/cli/source_declarative_manifest/_run.py +230 -0
- airbyte_cdk/cli/source_declarative_manifest/spec.json +17 -0
- airbyte_cdk/config_observation.py +29 -10
- airbyte_cdk/connector.py +24 -24
- airbyte_cdk/connector_builder/README.md +53 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +37 -11
- airbyte_cdk/connector_builder/main.py +45 -13
- airbyte_cdk/connector_builder/message_grouper.py +189 -50
- airbyte_cdk/connector_builder/models.py +3 -2
- airbyte_cdk/destinations/__init__.py +4 -3
- airbyte_cdk/destinations/destination.py +54 -20
- airbyte_cdk/destinations/vector_db_based/README.md +37 -0
- airbyte_cdk/destinations/vector_db_based/config.py +40 -17
- airbyte_cdk/destinations/vector_db_based/document_processor.py +56 -17
- airbyte_cdk/destinations/vector_db_based/embedder.py +57 -15
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +24 -5
- airbyte_cdk/entrypoint.py +153 -44
- airbyte_cdk/exception_handler.py +21 -3
- airbyte_cdk/logger.py +30 -44
- airbyte_cdk/models/__init__.py +13 -2
- airbyte_cdk/models/airbyte_protocol.py +86 -1
- airbyte_cdk/models/airbyte_protocol_serializers.py +44 -0
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/__init__.py +5 -1
- airbyte_cdk/sources/abstract_source.py +125 -79
- airbyte_cdk/sources/concurrent_source/__init__.py +7 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +102 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +29 -36
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +94 -10
- airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +25 -0
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +20 -14
- airbyte_cdk/sources/config.py +3 -2
- airbyte_cdk/sources/connector_state_manager.py +49 -83
- airbyte_cdk/sources/declarative/async_job/job.py +52 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +497 -0
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +75 -0
- airbyte_cdk/sources/declarative/async_job/repository.py +35 -0
- airbyte_cdk/sources/declarative/async_job/status.py +24 -0
- airbyte_cdk/sources/declarative/async_job/timer.py +39 -0
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -3
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +191 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +60 -20
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +10 -2
- airbyte_cdk/sources/declarative/auth/token.py +28 -10
- airbyte_cdk/sources/declarative/auth/token_provider.py +9 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +16 -8
- airbyte_cdk/sources/declarative/checks/connection_checker.py +4 -2
- airbyte_cdk/sources/declarative/concurrency_level/__init__.py +7 -0
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +50 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +490 -0
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +26 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1185 -85
- airbyte_cdk/sources/declarative/declarative_source.py +5 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +95 -9
- airbyte_cdk/sources/declarative/decoders/__init__.py +23 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/decoders/decoder.py +11 -4
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +92 -5
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +21 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +39 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +98 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +12 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +29 -24
- airbyte_cdk/sources/declarative/extractors/http_selector.py +4 -5
- airbyte_cdk/sources/declarative/extractors/record_extractor.py +2 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +63 -8
- airbyte_cdk/sources/declarative/extractors/record_selector.py +85 -26
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +177 -0
- airbyte_cdk/sources/declarative/extractors/type_transformer.py +55 -0
- airbyte_cdk/sources/declarative/incremental/__init__.py +31 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +340 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +156 -48
- airbyte_cdk/sources/declarative/incremental/declarative_cursor.py +13 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +350 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +174 -74
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +200 -0
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +122 -0
- airbyte_cdk/sources/declarative/interpolation/filters.py +27 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +23 -5
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +12 -8
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +13 -6
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +21 -6
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +9 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +72 -37
- airbyte_cdk/sources/declarative/interpolation/macros.py +72 -17
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +193 -52
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +98 -0
- airbyte_cdk/sources/declarative/migrations/state_migration.py +24 -0
- airbyte_cdk/sources/declarative/models/__init__.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1319 -603
- airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +2 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +26 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +26 -15
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +1759 -225
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +24 -4
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +176 -0
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +39 -9
- airbyte_cdk/sources/declarative/partition_routers/partition_router.py +62 -0
- airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +15 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +222 -39
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +19 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +4 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +41 -9
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +29 -14
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +5 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +32 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +46 -56
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +40 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +6 -32
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +119 -41
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +228 -0
- airbyte_cdk/sources/declarative/requesters/http_requester.py +98 -344
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +105 -46
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +14 -8
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +19 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +53 -21
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +42 -19
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +25 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +13 -10
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +26 -13
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +15 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +91 -0
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +60 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +31 -14
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +27 -15
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +63 -10
- airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +9 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +41 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +112 -0
- airbyte_cdk/sources/declarative/retrievers/__init__.py +6 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +100 -0
- airbyte_cdk/sources/declarative/retrievers/retriever.py +1 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +229 -73
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -1
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +5 -3
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +236 -0
- airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +8 -8
- airbyte_cdk/sources/declarative/spec/spec.py +12 -5
- airbyte_cdk/sources/declarative/stream_slicers/__init__.py +1 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +88 -0
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +9 -14
- airbyte_cdk/sources/declarative/transformations/add_fields.py +19 -11
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +52 -0
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +22 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +13 -10
- airbyte_cdk/sources/declarative/transformations/transformation.py +5 -5
- airbyte_cdk/sources/declarative/types.py +19 -110
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +31 -10
- airbyte_cdk/sources/embedded/base_integration.py +16 -5
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +5 -2
- airbyte_cdk/sources/file_based/README.md +152 -0
- airbyte_cdk/sources/file_based/__init__.py +24 -0
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +22 -6
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +46 -10
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +47 -10
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +29 -10
- airbyte_cdk/sources/file_based/config/excel_format.py +18 -0
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +16 -4
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +13 -5
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +18 -15
- airbyte_cdk/sources/file_based/file_based_source.py +140 -33
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +69 -5
- airbyte_cdk/sources/file_based/file_types/__init__.py +14 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +75 -24
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +116 -34
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +196 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +37 -0
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +24 -8
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +60 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +141 -41
- airbyte_cdk/sources/file_based/remote_file.py +1 -1
- airbyte_cdk/sources/file_based/schema_helpers.py +38 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +50 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +67 -27
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +14 -23
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +54 -18
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +21 -9
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +27 -10
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +147 -45
- airbyte_cdk/sources/http_logger.py +8 -3
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +42 -38
- airbyte_cdk/sources/streams/__init__.py +2 -2
- airbyte_cdk/sources/streams/availability_strategy.py +54 -3
- airbyte_cdk/sources/streams/call_rate.py +64 -21
- airbyte_cdk/sources/streams/checkpoint/__init__.py +26 -0
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +335 -0
- airbyte_cdk/sources/{declarative/incremental → streams/checkpoint}/cursor.py +17 -14
- airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +22 -0
- airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +51 -0
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +110 -0
- airbyte_cdk/sources/streams/concurrent/README.md +7 -0
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +7 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +84 -75
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +313 -48
- airbyte_cdk/sources/streams/concurrent/default_stream.py +12 -3
- airbyte_cdk/sources/streams/concurrent/exceptions.py +3 -0
- airbyte_cdk/sources/streams/concurrent/helpers.py +14 -3
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +12 -3
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +10 -3
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -16
- airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +21 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +15 -5
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +109 -17
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +90 -72
- airbyte_cdk/sources/streams/core.py +412 -87
- airbyte_cdk/sources/streams/http/__init__.py +2 -1
- airbyte_cdk/sources/streams/http/availability_strategy.py +12 -101
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +22 -0
- airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +28 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +17 -0
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +86 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +42 -0
- airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +19 -0
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +110 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +52 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +65 -0
- airbyte_cdk/sources/streams/http/exceptions.py +27 -7
- airbyte_cdk/sources/streams/http/http.py +369 -246
- airbyte_cdk/sources/streams/http/http_client.py +531 -0
- airbyte_cdk/sources/streams/http/rate_limiting.py +76 -12
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -9
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +90 -35
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +154 -0
- airbyte_cdk/sources/utils/record_helper.py +36 -21
- airbyte_cdk/sources/utils/schema_helpers.py +13 -6
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +54 -20
- airbyte_cdk/sql/_util/hashing.py +34 -0
- airbyte_cdk/sql/_util/name_normalizers.py +92 -0
- airbyte_cdk/sql/constants.py +32 -0
- airbyte_cdk/sql/exceptions.py +235 -0
- airbyte_cdk/sql/secrets.py +123 -0
- airbyte_cdk/sql/shared/__init__.py +15 -0
- airbyte_cdk/sql/shared/catalog_providers.py +145 -0
- airbyte_cdk/sql/shared/sql_processor.py +786 -0
- airbyte_cdk/sql/types.py +160 -0
- airbyte_cdk/test/catalog_builder.py +70 -18
- airbyte_cdk/test/entrypoint_wrapper.py +117 -42
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/matcher.py +6 -0
- airbyte_cdk/test/mock_http/mocker.py +57 -10
- airbyte_cdk/test/mock_http/request.py +19 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +32 -16
- airbyte_cdk/test/state_builder.py +18 -10
- airbyte_cdk/test/utils/__init__.py +1 -0
- airbyte_cdk/test/utils/data.py +24 -0
- airbyte_cdk/test/utils/http_mocking.py +16 -0
- airbyte_cdk/test/utils/manifest_only_fixtures.py +60 -0
- airbyte_cdk/test/utils/reading.py +26 -0
- airbyte_cdk/utils/__init__.py +2 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +5 -3
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +20 -11
- airbyte_cdk/utils/print_buffer.py +75 -0
- airbyte_cdk/utils/schema_inferrer.py +198 -28
- airbyte_cdk/utils/slice_hasher.py +30 -0
- airbyte_cdk/utils/spec_schema_transformations.py +6 -3
- airbyte_cdk/utils/stream_status_utils.py +8 -1
- airbyte_cdk/utils/traced_exception.py +61 -21
- airbyte_cdk-6.17.1.dev1.dist-info/METADATA +109 -0
- airbyte_cdk-6.17.1.dev1.dist-info/RECORD +350 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/WHEEL +1 -2
- airbyte_cdk-6.17.1.dev1.dist-info/entry_points.txt +3 -0
- airbyte_cdk/sources/declarative/create_partial.py +0 -92
- airbyte_cdk/sources/declarative/parsers/class_types_registry.py +0 -102
- airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py +0 -64
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_action.py +0 -16
- airbyte_cdk/sources/declarative/requesters/error_handlers/response_status.py +0 -68
- airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +0 -114
- airbyte_cdk/sources/deprecated/base_source.py +0 -94
- airbyte_cdk/sources/deprecated/client.py +0 -99
- airbyte_cdk/sources/singer/__init__.py +0 -8
- airbyte_cdk/sources/singer/singer_helpers.py +0 -304
- airbyte_cdk/sources/singer/source.py +0 -186
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -23
- airbyte_cdk/sources/streams/http/auth/__init__.py +0 -17
- airbyte_cdk/sources/streams/http/auth/core.py +0 -29
- airbyte_cdk/sources/streams/http/auth/oauth.py +0 -113
- airbyte_cdk/sources/streams/http/auth/token.py +0 -47
- airbyte_cdk/sources/streams/utils/stream_helper.py +0 -40
- airbyte_cdk/sources/utils/catalog_helpers.py +0 -22
- airbyte_cdk/sources/utils/schema_models.py +0 -84
- airbyte_cdk-0.72.1.dist-info/METADATA +0 -243
- airbyte_cdk-0.72.1.dist-info/RECORD +0 -466
- airbyte_cdk-0.72.1.dist-info/top_level.txt +0 -3
- source_declarative_manifest/main.py +0 -29
- unit_tests/connector_builder/__init__.py +0 -3
- unit_tests/connector_builder/test_connector_builder_handler.py +0 -871
- unit_tests/connector_builder/test_message_grouper.py +0 -713
- unit_tests/connector_builder/utils.py +0 -27
- unit_tests/destinations/test_destination.py +0 -243
- unit_tests/singer/test_singer_helpers.py +0 -56
- unit_tests/singer/test_singer_source.py +0 -112
- unit_tests/sources/__init__.py +0 -0
- unit_tests/sources/concurrent_source/__init__.py +0 -3
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +0 -106
- unit_tests/sources/declarative/__init__.py +0 -3
- unit_tests/sources/declarative/auth/__init__.py +0 -3
- unit_tests/sources/declarative/auth/test_oauth.py +0 -331
- unit_tests/sources/declarative/auth/test_selective_authenticator.py +0 -39
- unit_tests/sources/declarative/auth/test_session_token_auth.py +0 -182
- unit_tests/sources/declarative/auth/test_token_auth.py +0 -200
- unit_tests/sources/declarative/auth/test_token_provider.py +0 -73
- unit_tests/sources/declarative/checks/__init__.py +0 -3
- unit_tests/sources/declarative/checks/test_check_stream.py +0 -146
- unit_tests/sources/declarative/decoders/__init__.py +0 -0
- unit_tests/sources/declarative/decoders/test_json_decoder.py +0 -16
- unit_tests/sources/declarative/external_component.py +0 -13
- unit_tests/sources/declarative/extractors/__init__.py +0 -3
- unit_tests/sources/declarative/extractors/test_dpath_extractor.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_filter.py +0 -55
- unit_tests/sources/declarative/extractors/test_record_selector.py +0 -179
- unit_tests/sources/declarative/incremental/__init__.py +0 -0
- unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +0 -860
- unit_tests/sources/declarative/incremental/test_per_partition_cursor.py +0 -406
- unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +0 -332
- unit_tests/sources/declarative/interpolation/__init__.py +0 -3
- unit_tests/sources/declarative/interpolation/test_filters.py +0 -80
- unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +0 -40
- unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +0 -35
- unit_tests/sources/declarative/interpolation/test_interpolated_nested_mapping.py +0 -45
- unit_tests/sources/declarative/interpolation/test_interpolated_string.py +0 -25
- unit_tests/sources/declarative/interpolation/test_jinja.py +0 -240
- unit_tests/sources/declarative/interpolation/test_macros.py +0 -73
- unit_tests/sources/declarative/parsers/__init__.py +0 -3
- unit_tests/sources/declarative/parsers/test_manifest_component_transformer.py +0 -406
- unit_tests/sources/declarative/parsers/test_manifest_reference_resolver.py +0 -139
- unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +0 -1847
- unit_tests/sources/declarative/parsers/testing_components.py +0 -36
- unit_tests/sources/declarative/partition_routers/__init__.py +0 -3
- unit_tests/sources/declarative/partition_routers/test_list_partition_router.py +0 -155
- unit_tests/sources/declarative/partition_routers/test_single_partition_router.py +0 -14
- unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +0 -404
- unit_tests/sources/declarative/requesters/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_constant_backoff.py +0 -34
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +0 -36
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_header_helper.py +0 -38
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +0 -35
- unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +0 -64
- unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +0 -213
- unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +0 -178
- unit_tests/sources/declarative/requesters/error_handlers/test_http_response_filter.py +0 -121
- unit_tests/sources/declarative/requesters/error_handlers/test_response_status.py +0 -44
- unit_tests/sources/declarative/requesters/paginators/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +0 -64
- unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +0 -313
- unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +0 -12
- unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +0 -58
- unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +0 -70
- unit_tests/sources/declarative/requesters/paginators/test_request_option.py +0 -43
- unit_tests/sources/declarative/requesters/paginators/test_stop_condition.py +0 -105
- unit_tests/sources/declarative/requesters/request_options/__init__.py +0 -3
- unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +0 -101
- unit_tests/sources/declarative/requesters/test_http_requester.py +0 -974
- unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +0 -32
- unit_tests/sources/declarative/retrievers/__init__.py +0 -3
- unit_tests/sources/declarative/retrievers/test_simple_retriever.py +0 -542
- unit_tests/sources/declarative/schema/__init__.py +0 -6
- unit_tests/sources/declarative/schema/source_test/SourceTest.py +0 -8
- unit_tests/sources/declarative/schema/source_test/__init__.py +0 -3
- unit_tests/sources/declarative/schema/test_default_schema_loader.py +0 -32
- unit_tests/sources/declarative/schema/test_inline_schema_loader.py +0 -19
- unit_tests/sources/declarative/schema/test_json_file_schema_loader.py +0 -26
- unit_tests/sources/declarative/states/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/__init__.py +0 -3
- unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +0 -225
- unit_tests/sources/declarative/test_create_partial.py +0 -83
- unit_tests/sources/declarative/test_declarative_stream.py +0 -103
- unit_tests/sources/declarative/test_manifest_declarative_source.py +0 -1260
- unit_tests/sources/declarative/test_types.py +0 -39
- unit_tests/sources/declarative/test_yaml_declarative_source.py +0 -148
- unit_tests/sources/file_based/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/__init__.py +0 -0
- unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +0 -100
- unit_tests/sources/file_based/config/__init__.py +0 -0
- unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +0 -28
- unit_tests/sources/file_based/config/test_csv_format.py +0 -34
- unit_tests/sources/file_based/config/test_file_based_stream_config.py +0 -84
- unit_tests/sources/file_based/discovery_policy/__init__.py +0 -0
- unit_tests/sources/file_based/discovery_policy/test_default_discovery_policy.py +0 -31
- unit_tests/sources/file_based/file_types/__init__.py +0 -0
- unit_tests/sources/file_based/file_types/test_avro_parser.py +0 -243
- unit_tests/sources/file_based/file_types/test_csv_parser.py +0 -546
- unit_tests/sources/file_based/file_types/test_jsonl_parser.py +0 -158
- unit_tests/sources/file_based/file_types/test_parquet_parser.py +0 -274
- unit_tests/sources/file_based/file_types/test_unstructured_parser.py +0 -593
- unit_tests/sources/file_based/helpers.py +0 -70
- unit_tests/sources/file_based/in_memory_files_source.py +0 -211
- unit_tests/sources/file_based/scenarios/__init__.py +0 -0
- unit_tests/sources/file_based/scenarios/avro_scenarios.py +0 -744
- unit_tests/sources/file_based/scenarios/check_scenarios.py +0 -220
- unit_tests/sources/file_based/scenarios/concurrent_incremental_scenarios.py +0 -2844
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +0 -3105
- unit_tests/sources/file_based/scenarios/file_based_source_builder.py +0 -91
- unit_tests/sources/file_based/scenarios/incremental_scenarios.py +0 -1926
- unit_tests/sources/file_based/scenarios/jsonl_scenarios.py +0 -930
- unit_tests/sources/file_based/scenarios/parquet_scenarios.py +0 -754
- unit_tests/sources/file_based/scenarios/scenario_builder.py +0 -234
- unit_tests/sources/file_based/scenarios/unstructured_scenarios.py +0 -608
- unit_tests/sources/file_based/scenarios/user_input_schema_scenarios.py +0 -746
- unit_tests/sources/file_based/scenarios/validation_policy_scenarios.py +0 -726
- unit_tests/sources/file_based/stream/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/__init__.py +0 -0
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +0 -362
- unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +0 -458
- unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +0 -310
- unit_tests/sources/file_based/stream/test_default_file_based_stream.py +0 -244
- unit_tests/sources/file_based/test_file_based_scenarios.py +0 -320
- unit_tests/sources/file_based/test_file_based_stream_reader.py +0 -272
- unit_tests/sources/file_based/test_scenarios.py +0 -253
- unit_tests/sources/file_based/test_schema_helpers.py +0 -346
- unit_tests/sources/fixtures/__init__.py +0 -3
- unit_tests/sources/fixtures/source_test_fixture.py +0 -153
- unit_tests/sources/message/__init__.py +0 -0
- unit_tests/sources/message/test_repository.py +0 -153
- unit_tests/sources/streams/__init__.py +0 -0
- unit_tests/sources/streams/concurrent/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/__init__.py +0 -3
- unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py +0 -250
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +0 -140
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py +0 -452
- unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py +0 -76
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +0 -418
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +0 -142
- unit_tests/sources/streams/concurrent/scenarios/utils.py +0 -55
- unit_tests/sources/streams/concurrent/test_adapters.py +0 -380
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +0 -684
- unit_tests/sources/streams/concurrent/test_cursor.py +0 -139
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +0 -369
- unit_tests/sources/streams/concurrent/test_default_stream.py +0 -197
- unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +0 -90
- unit_tests/sources/streams/concurrent/test_partition_reader.py +0 -67
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +0 -106
- unit_tests/sources/streams/http/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/__init__.py +0 -0
- unit_tests/sources/streams/http/auth/test_auth.py +0 -173
- unit_tests/sources/streams/http/requests_native_auth/__init__.py +0 -0
- unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +0 -423
- unit_tests/sources/streams/http/test_availability_strategy.py +0 -180
- unit_tests/sources/streams/http/test_http.py +0 -635
- unit_tests/sources/streams/test_availability_strategy.py +0 -70
- unit_tests/sources/streams/test_call_rate.py +0 -300
- unit_tests/sources/streams/test_stream_read.py +0 -405
- unit_tests/sources/streams/test_streams_core.py +0 -184
- unit_tests/sources/test_abstract_source.py +0 -1442
- unit_tests/sources/test_concurrent_source.py +0 -112
- unit_tests/sources/test_config.py +0 -92
- unit_tests/sources/test_connector_state_manager.py +0 -482
- unit_tests/sources/test_http_logger.py +0 -252
- unit_tests/sources/test_integration_source.py +0 -86
- unit_tests/sources/test_source.py +0 -684
- unit_tests/sources/test_source_read.py +0 -460
- unit_tests/test/__init__.py +0 -0
- unit_tests/test/mock_http/__init__.py +0 -0
- unit_tests/test/mock_http/test_matcher.py +0 -53
- unit_tests/test/mock_http/test_mocker.py +0 -214
- unit_tests/test/mock_http/test_request.py +0 -117
- unit_tests/test/mock_http/test_response_builder.py +0 -177
- unit_tests/test/test_entrypoint_wrapper.py +0 -240
- unit_tests/utils/__init__.py +0 -0
- unit_tests/utils/test_datetime_format_inferrer.py +0 -60
- unit_tests/utils/test_mapping_helpers.py +0 -54
- unit_tests/utils/test_message_utils.py +0 -91
- unit_tests/utils/test_rate_limiting.py +0 -26
- unit_tests/utils/test_schema_inferrer.py +0 -202
- unit_tests/utils/test_secret_utils.py +0 -135
- unit_tests/utils/test_stream_status_utils.py +0 -61
- unit_tests/utils/test_traced_exception.py +0 -107
- /airbyte_cdk/sources/{deprecated → declarative/async_job}/__init__.py +0 -0
- {source_declarative_manifest → airbyte_cdk/sources/declarative/migrations}/__init__.py +0 -0
- {unit_tests/destinations → airbyte_cdk/sql}/__init__.py +0 -0
- {unit_tests/singer → airbyte_cdk/sql/_util}/__init__.py +0 -0
- {airbyte_cdk-0.72.1.dist-info → airbyte_cdk-6.17.1.dev1.dist-info}/LICENSE.txt +0 -0
@@ -4,15 +4,44 @@
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
|
7
|
+
import datetime
|
7
8
|
import importlib
|
8
9
|
import inspect
|
9
10
|
import re
|
10
|
-
from
|
11
|
+
from functools import partial
|
12
|
+
from typing import (
|
13
|
+
Any,
|
14
|
+
Callable,
|
15
|
+
Dict,
|
16
|
+
List,
|
17
|
+
Mapping,
|
18
|
+
MutableMapping,
|
19
|
+
Optional,
|
20
|
+
Type,
|
21
|
+
Union,
|
22
|
+
get_args,
|
23
|
+
get_origin,
|
24
|
+
get_type_hints,
|
25
|
+
)
|
11
26
|
|
12
|
-
from
|
13
|
-
from
|
14
|
-
|
15
|
-
from airbyte_cdk.
|
27
|
+
from isodate import parse_duration
|
28
|
+
from pydantic.v1 import BaseModel
|
29
|
+
|
30
|
+
from airbyte_cdk.models import FailureType, Level
|
31
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
32
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
|
33
|
+
from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
|
34
|
+
from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
|
35
|
+
from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
|
36
|
+
from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator
|
37
|
+
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
|
38
|
+
DeclarativeAuthenticator,
|
39
|
+
NoAuth,
|
40
|
+
)
|
41
|
+
from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm
|
42
|
+
from airbyte_cdk.sources.declarative.auth.oauth import (
|
43
|
+
DeclarativeSingleUseRefreshTokenOauth2Authenticator,
|
44
|
+
)
|
16
45
|
from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator
|
17
46
|
from airbyte_cdk.sources.declarative.auth.token import (
|
18
47
|
ApiKeyAuthenticator,
|
@@ -20,86 +49,338 @@ from airbyte_cdk.sources.declarative.auth.token import (
|
|
20
49
|
BearerAuthenticator,
|
21
50
|
LegacySessionTokenAuthenticator,
|
22
51
|
)
|
23
|
-
from airbyte_cdk.sources.declarative.auth.token_provider import
|
52
|
+
from airbyte_cdk.sources.declarative.auth.token_provider import (
|
53
|
+
InterpolatedStringTokenProvider,
|
54
|
+
SessionTokenProvider,
|
55
|
+
TokenProvider,
|
56
|
+
)
|
24
57
|
from airbyte_cdk.sources.declarative.checks import CheckStream
|
58
|
+
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
25
59
|
from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
26
60
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
27
|
-
from airbyte_cdk.sources.declarative.decoders import
|
28
|
-
|
29
|
-
|
30
|
-
|
61
|
+
from airbyte_cdk.sources.declarative.decoders import (
|
62
|
+
Decoder,
|
63
|
+
GzipJsonDecoder,
|
64
|
+
IterableDecoder,
|
65
|
+
JsonDecoder,
|
66
|
+
JsonlDecoder,
|
67
|
+
PaginationDecoderDecorator,
|
68
|
+
XmlDecoder,
|
69
|
+
)
|
70
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
71
|
+
CompositeRawDecoder,
|
72
|
+
CsvParser,
|
73
|
+
GzipParser,
|
74
|
+
JsonLineParser,
|
75
|
+
)
|
76
|
+
from airbyte_cdk.sources.declarative.extractors import (
|
77
|
+
DpathExtractor,
|
78
|
+
RecordFilter,
|
79
|
+
RecordSelector,
|
80
|
+
ResponseToFileExtractor,
|
81
|
+
)
|
82
|
+
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
83
|
+
ClientSideIncrementalRecordFilterDecorator,
|
84
|
+
)
|
85
|
+
from airbyte_cdk.sources.declarative.incremental import (
|
86
|
+
ChildPartitionResumableFullRefreshCursor,
|
87
|
+
ConcurrentCursorFactory,
|
88
|
+
ConcurrentPerPartitionCursor,
|
89
|
+
CursorFactory,
|
90
|
+
DatetimeBasedCursor,
|
91
|
+
DeclarativeCursor,
|
92
|
+
GlobalSubstreamCursor,
|
93
|
+
PerPartitionCursor,
|
94
|
+
PerPartitionWithGlobalCursor,
|
95
|
+
ResumableFullRefreshCursor,
|
96
|
+
)
|
31
97
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
32
98
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
33
|
-
from airbyte_cdk.sources.declarative.
|
34
|
-
|
35
|
-
|
36
|
-
from airbyte_cdk.sources.declarative.models
|
37
|
-
|
38
|
-
|
39
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
40
|
-
|
41
|
-
|
42
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
43
|
-
|
44
|
-
|
45
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
46
|
-
|
47
|
-
|
48
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
49
|
-
|
50
|
-
|
51
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
52
|
-
|
53
|
-
|
54
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
55
|
-
|
56
|
-
|
57
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
58
|
-
|
99
|
+
from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
|
100
|
+
LegacyToPerPartitionStateMigration,
|
101
|
+
)
|
102
|
+
from airbyte_cdk.sources.declarative.models import (
|
103
|
+
CustomStateMigration,
|
104
|
+
)
|
105
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
106
|
+
AddedFieldDefinition as AddedFieldDefinitionModel,
|
107
|
+
)
|
108
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
109
|
+
AddFields as AddFieldsModel,
|
110
|
+
)
|
111
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
112
|
+
ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
|
113
|
+
)
|
114
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
115
|
+
AsyncJobStatusMap as AsyncJobStatusMapModel,
|
116
|
+
)
|
117
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
118
|
+
AsyncRetriever as AsyncRetrieverModel,
|
119
|
+
)
|
120
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
121
|
+
BasicHttpAuthenticator as BasicHttpAuthenticatorModel,
|
122
|
+
)
|
123
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
124
|
+
BearerAuthenticator as BearerAuthenticatorModel,
|
125
|
+
)
|
126
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
127
|
+
CheckStream as CheckStreamModel,
|
128
|
+
)
|
129
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
130
|
+
ComponentMappingDefinition as ComponentMappingDefinitionModel,
|
131
|
+
)
|
132
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
133
|
+
CompositeErrorHandler as CompositeErrorHandlerModel,
|
134
|
+
)
|
135
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
136
|
+
CompositeRawDecoder as CompositeRawDecoderModel,
|
137
|
+
)
|
138
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
139
|
+
ConcurrencyLevel as ConcurrencyLevelModel,
|
140
|
+
)
|
141
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
142
|
+
ConfigComponentsResolver as ConfigComponentsResolverModel,
|
143
|
+
)
|
144
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
145
|
+
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
146
|
+
)
|
147
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
148
|
+
CsvParser as CsvParserModel,
|
149
|
+
)
|
150
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
151
|
+
CursorPagination as CursorPaginationModel,
|
152
|
+
)
|
153
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
154
|
+
CustomAuthenticator as CustomAuthenticatorModel,
|
155
|
+
)
|
156
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
157
|
+
CustomBackoffStrategy as CustomBackoffStrategyModel,
|
158
|
+
)
|
159
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
160
|
+
CustomDecoder as CustomDecoderModel,
|
161
|
+
)
|
162
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
163
|
+
CustomErrorHandler as CustomErrorHandlerModel,
|
164
|
+
)
|
165
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
166
|
+
CustomIncrementalSync as CustomIncrementalSyncModel,
|
167
|
+
)
|
168
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
169
|
+
CustomPaginationStrategy as CustomPaginationStrategyModel,
|
170
|
+
)
|
171
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
172
|
+
CustomPartitionRouter as CustomPartitionRouterModel,
|
173
|
+
)
|
174
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
175
|
+
CustomRecordExtractor as CustomRecordExtractorModel,
|
176
|
+
)
|
177
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
178
|
+
CustomRecordFilter as CustomRecordFilterModel,
|
179
|
+
)
|
180
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
181
|
+
CustomRequester as CustomRequesterModel,
|
182
|
+
)
|
183
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
184
|
+
CustomRetriever as CustomRetrieverModel,
|
185
|
+
)
|
186
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
187
|
+
CustomSchemaLoader as CustomSchemaLoader,
|
188
|
+
)
|
189
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
190
|
+
CustomSchemaNormalization as CustomSchemaNormalizationModel,
|
191
|
+
)
|
192
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
193
|
+
CustomTransformation as CustomTransformationModel,
|
194
|
+
)
|
195
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
196
|
+
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
197
|
+
)
|
198
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
199
|
+
DeclarativeStream as DeclarativeStreamModel,
|
200
|
+
)
|
201
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
202
|
+
DefaultErrorHandler as DefaultErrorHandlerModel,
|
203
|
+
)
|
204
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
205
|
+
DefaultPaginator as DefaultPaginatorModel,
|
206
|
+
)
|
207
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
208
|
+
DpathExtractor as DpathExtractorModel,
|
209
|
+
)
|
210
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
211
|
+
DynamicSchemaLoader as DynamicSchemaLoaderModel,
|
212
|
+
)
|
59
213
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
60
214
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
61
215
|
)
|
62
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
63
|
-
|
64
|
-
|
65
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
66
|
-
|
216
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
217
|
+
FlattenFields as FlattenFieldsModel,
|
218
|
+
)
|
219
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
220
|
+
GzipJsonDecoder as GzipJsonDecoderModel,
|
221
|
+
)
|
222
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
223
|
+
GzipParser as GzipParserModel,
|
224
|
+
)
|
225
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
226
|
+
HttpComponentsResolver as HttpComponentsResolverModel,
|
227
|
+
)
|
228
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
229
|
+
HttpRequester as HttpRequesterModel,
|
230
|
+
)
|
231
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
232
|
+
HttpResponseFilter as HttpResponseFilterModel,
|
233
|
+
)
|
234
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
235
|
+
InlineSchemaLoader as InlineSchemaLoaderModel,
|
236
|
+
)
|
237
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
238
|
+
IterableDecoder as IterableDecoderModel,
|
239
|
+
)
|
240
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
241
|
+
JsonDecoder as JsonDecoderModel,
|
242
|
+
)
|
243
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
244
|
+
JsonFileSchemaLoader as JsonFileSchemaLoaderModel,
|
245
|
+
)
|
246
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
247
|
+
JsonlDecoder as JsonlDecoderModel,
|
248
|
+
)
|
249
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
250
|
+
JsonLineParser as JsonLineParserModel,
|
251
|
+
)
|
252
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
253
|
+
JwtAuthenticator as JwtAuthenticatorModel,
|
254
|
+
)
|
255
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
256
|
+
JwtHeaders as JwtHeadersModel,
|
257
|
+
)
|
258
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
259
|
+
JwtPayload as JwtPayloadModel,
|
260
|
+
)
|
261
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
262
|
+
KeysReplace as KeysReplaceModel,
|
263
|
+
)
|
264
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
265
|
+
KeysToLower as KeysToLowerModel,
|
266
|
+
)
|
267
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
268
|
+
KeysToSnakeCase as KeysToSnakeCaseModel,
|
269
|
+
)
|
67
270
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
68
271
|
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
|
69
272
|
)
|
70
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
71
|
-
|
72
|
-
|
73
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
74
|
-
|
75
|
-
|
76
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
77
|
-
|
78
|
-
|
79
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
80
|
-
|
81
|
-
|
82
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
83
|
-
|
84
|
-
|
85
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
273
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
274
|
+
LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel,
|
275
|
+
)
|
276
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
277
|
+
ListPartitionRouter as ListPartitionRouterModel,
|
278
|
+
)
|
279
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
280
|
+
MinMaxDatetime as MinMaxDatetimeModel,
|
281
|
+
)
|
282
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
283
|
+
NoAuth as NoAuthModel,
|
284
|
+
)
|
285
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
286
|
+
NoPagination as NoPaginationModel,
|
287
|
+
)
|
288
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
289
|
+
OAuthAuthenticator as OAuthAuthenticatorModel,
|
290
|
+
)
|
291
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
292
|
+
OffsetIncrement as OffsetIncrementModel,
|
293
|
+
)
|
294
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
295
|
+
PageIncrement as PageIncrementModel,
|
296
|
+
)
|
297
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
298
|
+
ParentStreamConfig as ParentStreamConfigModel,
|
299
|
+
)
|
300
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
301
|
+
RecordFilter as RecordFilterModel,
|
302
|
+
)
|
303
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
304
|
+
RecordSelector as RecordSelectorModel,
|
305
|
+
)
|
306
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
307
|
+
RemoveFields as RemoveFieldsModel,
|
308
|
+
)
|
309
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
310
|
+
RequestOption as RequestOptionModel,
|
311
|
+
)
|
312
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
313
|
+
RequestPath as RequestPathModel,
|
314
|
+
)
|
315
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
316
|
+
ResponseToFileExtractor as ResponseToFileExtractorModel,
|
317
|
+
)
|
318
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
319
|
+
SchemaNormalization as SchemaNormalizationModel,
|
320
|
+
)
|
321
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
322
|
+
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
|
323
|
+
)
|
324
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
325
|
+
SelectiveAuthenticator as SelectiveAuthenticatorModel,
|
326
|
+
)
|
327
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
328
|
+
SessionTokenAuthenticator as SessionTokenAuthenticatorModel,
|
329
|
+
)
|
330
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
331
|
+
SimpleRetriever as SimpleRetrieverModel,
|
332
|
+
)
|
86
333
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
87
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
334
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
335
|
+
StreamConfig as StreamConfigModel,
|
336
|
+
)
|
337
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
338
|
+
SubstreamPartitionRouter as SubstreamPartitionRouterModel,
|
339
|
+
)
|
340
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
341
|
+
TypesMap as TypesMapModel,
|
342
|
+
)
|
88
343
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
89
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
90
|
-
|
91
|
-
|
92
|
-
from airbyte_cdk.sources.declarative.
|
344
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
345
|
+
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
346
|
+
)
|
347
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
348
|
+
WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel,
|
349
|
+
)
|
350
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
351
|
+
XmlDecoder as XmlDecoderModel,
|
352
|
+
)
|
353
|
+
from airbyte_cdk.sources.declarative.partition_routers import (
|
354
|
+
CartesianProductStreamSlicer,
|
355
|
+
ListPartitionRouter,
|
356
|
+
PartitionRouter,
|
357
|
+
SinglePartitionRouter,
|
358
|
+
SubstreamPartitionRouter,
|
359
|
+
)
|
360
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
361
|
+
AsyncJobPartitionRouter,
|
362
|
+
)
|
363
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
364
|
+
ParentStreamConfig,
|
365
|
+
)
|
93
366
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption
|
94
|
-
from airbyte_cdk.sources.declarative.requesters.error_handlers import
|
367
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers import (
|
368
|
+
CompositeErrorHandler,
|
369
|
+
DefaultErrorHandler,
|
370
|
+
HttpResponseFilter,
|
371
|
+
)
|
95
372
|
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import (
|
96
373
|
ConstantBackoffStrategy,
|
97
374
|
ExponentialBackoffStrategy,
|
98
375
|
WaitTimeFromHeaderBackoffStrategy,
|
99
376
|
WaitUntilTimeFromHeaderBackoffStrategy,
|
100
377
|
)
|
101
|
-
from airbyte_cdk.sources.declarative.requesters.
|
102
|
-
from airbyte_cdk.sources.declarative.requesters.paginators import
|
378
|
+
from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
|
379
|
+
from airbyte_cdk.sources.declarative.requesters.paginators import (
|
380
|
+
DefaultPaginator,
|
381
|
+
NoPagination,
|
382
|
+
PaginatorTestReadDecorator,
|
383
|
+
)
|
103
384
|
from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
|
104
385
|
CursorPaginationStrategy,
|
105
386
|
CursorStopCondition,
|
@@ -108,34 +389,87 @@ from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
|
|
108
389
|
StopConditionPaginationStrategyDecorator,
|
109
390
|
)
|
110
391
|
from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
|
111
|
-
from airbyte_cdk.sources.declarative.requesters.request_options import
|
392
|
+
from airbyte_cdk.sources.declarative.requesters.request_options import (
|
393
|
+
DatetimeBasedRequestOptionsProvider,
|
394
|
+
DefaultRequestOptionsProvider,
|
395
|
+
InterpolatedRequestOptionsProvider,
|
396
|
+
RequestOptionsProvider,
|
397
|
+
)
|
112
398
|
from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
113
399
|
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
114
|
-
from airbyte_cdk.sources.declarative.
|
115
|
-
|
400
|
+
from airbyte_cdk.sources.declarative.resolvers import (
|
401
|
+
ComponentMappingDefinition,
|
402
|
+
ConfigComponentsResolver,
|
403
|
+
HttpComponentsResolver,
|
404
|
+
StreamConfig,
|
405
|
+
)
|
406
|
+
from airbyte_cdk.sources.declarative.retrievers import (
|
407
|
+
AsyncRetriever,
|
408
|
+
SimpleRetriever,
|
409
|
+
SimpleRetrieverTestReadDecorator,
|
410
|
+
)
|
411
|
+
from airbyte_cdk.sources.declarative.schema import (
|
412
|
+
DefaultSchemaLoader,
|
413
|
+
DynamicSchemaLoader,
|
414
|
+
InlineSchemaLoader,
|
415
|
+
JsonFileSchemaLoader,
|
416
|
+
SchemaTypeIdentifier,
|
417
|
+
TypesMap,
|
418
|
+
)
|
116
419
|
from airbyte_cdk.sources.declarative.spec import Spec
|
117
|
-
from airbyte_cdk.sources.declarative.stream_slicers import
|
118
|
-
from airbyte_cdk.sources.declarative.transformations import
|
420
|
+
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
421
|
+
from airbyte_cdk.sources.declarative.transformations import (
|
422
|
+
AddFields,
|
423
|
+
RecordTransformation,
|
424
|
+
RemoveFields,
|
425
|
+
)
|
119
426
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
|
120
|
-
from airbyte_cdk.sources.declarative.
|
121
|
-
|
122
|
-
|
123
|
-
from
|
124
|
-
|
427
|
+
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
428
|
+
FlattenFields,
|
429
|
+
)
|
430
|
+
from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
|
431
|
+
KeysReplaceTransformation,
|
432
|
+
)
|
433
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
434
|
+
KeysToLowerTransformation,
|
435
|
+
)
|
436
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
|
437
|
+
KeysToSnakeCaseTransformation,
|
438
|
+
)
|
439
|
+
from airbyte_cdk.sources.message import (
|
440
|
+
InMemoryMessageRepository,
|
441
|
+
LogAppenderMessageRepositoryDecorator,
|
442
|
+
MessageRepository,
|
443
|
+
NoopMessageRepository,
|
444
|
+
)
|
445
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
446
|
+
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
447
|
+
CustomFormatConcurrentStreamStateConverter,
|
448
|
+
DateTimeStreamStateConverter,
|
449
|
+
)
|
450
|
+
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
|
451
|
+
from airbyte_cdk.sources.types import Config
|
452
|
+
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
125
453
|
|
126
454
|
ComponentDefinition = Mapping[str, Any]
|
127
455
|
|
128
|
-
|
129
|
-
|
456
|
+
SCHEMA_TRANSFORMER_TYPE_MAPPING = {
|
457
|
+
SchemaNormalizationModel.None_: TransformConfig.NoTransform,
|
458
|
+
SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
|
459
|
+
}
|
130
460
|
|
131
461
|
|
132
462
|
class ModelToComponentFactory:
|
463
|
+
EPOCH_DATETIME_FORMAT = "%s"
|
464
|
+
|
133
465
|
def __init__(
|
134
466
|
self,
|
135
467
|
limit_pages_fetched_per_slice: Optional[int] = None,
|
136
468
|
limit_slices_fetched: Optional[int] = None,
|
137
469
|
emit_connector_builder_messages: bool = False,
|
138
470
|
disable_retries: bool = False,
|
471
|
+
disable_cache: bool = False,
|
472
|
+
disable_resumable_full_refresh: bool = False,
|
139
473
|
message_repository: Optional[MessageRepository] = None,
|
140
474
|
):
|
141
475
|
self._init_mappings()
|
@@ -143,7 +477,9 @@ class ModelToComponentFactory:
|
|
143
477
|
self._limit_slices_fetched = limit_slices_fetched
|
144
478
|
self._emit_connector_builder_messages = emit_connector_builder_messages
|
145
479
|
self._disable_retries = disable_retries
|
146
|
-
self.
|
480
|
+
self._disable_cache = disable_cache
|
481
|
+
self._disable_resumable_full_refresh = disable_resumable_full_refresh
|
482
|
+
self._message_repository = message_repository or InMemoryMessageRepository(
|
147
483
|
self._evaluate_log_level(emit_connector_builder_messages)
|
148
484
|
)
|
149
485
|
|
@@ -156,10 +492,13 @@ class ModelToComponentFactory:
|
|
156
492
|
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
157
493
|
CheckStreamModel: self.create_check_stream,
|
158
494
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
495
|
+
CompositeRawDecoderModel: self.create_composite_raw_decoder,
|
496
|
+
ConcurrencyLevelModel: self.create_concurrency_level,
|
159
497
|
ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
|
160
498
|
CursorPaginationModel: self.create_cursor_pagination,
|
161
499
|
CustomAuthenticatorModel: self.create_custom_component,
|
162
500
|
CustomBackoffStrategyModel: self.create_custom_component,
|
501
|
+
CustomDecoderModel: self.create_custom_component,
|
163
502
|
CustomErrorHandlerModel: self.create_custom_component,
|
164
503
|
CustomIncrementalSyncModel: self.create_custom_component,
|
165
504
|
CustomRecordExtractorModel: self.create_custom_component,
|
@@ -167,6 +506,8 @@ class ModelToComponentFactory:
|
|
167
506
|
CustomRequesterModel: self.create_custom_component,
|
168
507
|
CustomRetrieverModel: self.create_custom_component,
|
169
508
|
CustomSchemaLoader: self.create_custom_component,
|
509
|
+
CustomSchemaNormalizationModel: self.create_custom_component,
|
510
|
+
CustomStateMigration: self.create_custom_component,
|
170
511
|
CustomPaginationStrategyModel: self.create_custom_component,
|
171
512
|
CustomPartitionRouterModel: self.create_custom_component,
|
172
513
|
CustomTransformationModel: self.create_custom_component,
|
@@ -175,13 +516,29 @@ class ModelToComponentFactory:
|
|
175
516
|
DefaultErrorHandlerModel: self.create_default_error_handler,
|
176
517
|
DefaultPaginatorModel: self.create_default_paginator,
|
177
518
|
DpathExtractorModel: self.create_dpath_extractor,
|
519
|
+
ResponseToFileExtractorModel: self.create_response_to_file_extractor,
|
178
520
|
ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
|
179
521
|
SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
|
180
522
|
HttpRequesterModel: self.create_http_requester,
|
181
523
|
HttpResponseFilterModel: self.create_http_response_filter,
|
182
524
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
183
525
|
JsonDecoderModel: self.create_json_decoder,
|
526
|
+
JsonlDecoderModel: self.create_jsonl_decoder,
|
527
|
+
JsonLineParserModel: self.create_json_line_parser,
|
528
|
+
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
529
|
+
GzipParserModel: self.create_gzip_parser,
|
530
|
+
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
531
|
+
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
532
|
+
KeysReplaceModel: self.create_keys_replace_transformation,
|
533
|
+
FlattenFieldsModel: self.create_flatten_fields,
|
534
|
+
IterableDecoderModel: self.create_iterable_decoder,
|
535
|
+
XmlDecoderModel: self.create_xml_decoder,
|
184
536
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
537
|
+
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
538
|
+
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
539
|
+
TypesMapModel: self.create_types_map,
|
540
|
+
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
541
|
+
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
185
542
|
ListPartitionRouterModel: self.create_list_partition_router,
|
186
543
|
MinMaxDatetimeModel: self.create_min_max_datetime,
|
187
544
|
NoAuthModel: self.create_no_auth,
|
@@ -202,13 +559,22 @@ class ModelToComponentFactory:
|
|
202
559
|
SubstreamPartitionRouterModel: self.create_substream_partition_router,
|
203
560
|
WaitTimeFromHeaderModel: self.create_wait_time_from_header,
|
204
561
|
WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
|
562
|
+
AsyncRetrieverModel: self.create_async_retriever,
|
563
|
+
HttpComponentsResolverModel: self.create_http_components_resolver,
|
564
|
+
ConfigComponentsResolverModel: self.create_config_components_resolver,
|
565
|
+
StreamConfigModel: self.create_stream_config,
|
566
|
+
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
205
567
|
}
|
206
568
|
|
207
569
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
208
570
|
self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
|
209
571
|
|
210
572
|
def create_component(
|
211
|
-
self,
|
573
|
+
self,
|
574
|
+
model_type: Type[BaseModel],
|
575
|
+
component_definition: ComponentDefinition,
|
576
|
+
config: Config,
|
577
|
+
**kwargs: Any,
|
212
578
|
) -> Any:
|
213
579
|
"""
|
214
580
|
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
|
@@ -223,26 +589,38 @@ class ModelToComponentFactory:
|
|
223
589
|
|
224
590
|
component_type = component_definition.get("type")
|
225
591
|
if component_definition.get("type") != model_type.__name__:
|
226
|
-
raise ValueError(
|
592
|
+
raise ValueError(
|
593
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
594
|
+
)
|
227
595
|
|
228
596
|
declarative_component_model = model_type.parse_obj(component_definition)
|
229
597
|
|
230
598
|
if not isinstance(declarative_component_model, model_type):
|
231
|
-
raise ValueError(
|
599
|
+
raise ValueError(
|
600
|
+
f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
|
601
|
+
)
|
232
602
|
|
233
|
-
return self._create_component_from_model(
|
603
|
+
return self._create_component_from_model(
|
604
|
+
model=declarative_component_model, config=config, **kwargs
|
605
|
+
)
|
234
606
|
|
235
607
|
def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
|
236
608
|
if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
|
237
|
-
raise ValueError(
|
609
|
+
raise ValueError(
|
610
|
+
f"{model.__class__} with attributes {model} is not a valid component type"
|
611
|
+
)
|
238
612
|
component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
|
239
613
|
if not component_constructor:
|
240
614
|
raise ValueError(f"Could not find constructor for {model.__class__}")
|
241
615
|
return component_constructor(model=model, config=config, **kwargs)
|
242
616
|
|
243
617
|
@staticmethod
|
244
|
-
def create_added_field_definition(
|
245
|
-
|
618
|
+
def create_added_field_definition(
|
619
|
+
model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
|
620
|
+
) -> AddedFieldDefinition:
|
621
|
+
interpolated_value = InterpolatedString.create(
|
622
|
+
model.value, parameters=model.parameters or {}
|
623
|
+
)
|
246
624
|
return AddedFieldDefinition(
|
247
625
|
path=model.path,
|
248
626
|
value=interpolated_value,
|
@@ -254,13 +632,39 @@ class ModelToComponentFactory:
|
|
254
632
|
added_field_definitions = [
|
255
633
|
self._create_component_from_model(
|
256
634
|
model=added_field_definition_model,
|
257
|
-
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
635
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
636
|
+
added_field_definition_model.value_type
|
637
|
+
),
|
258
638
|
config=config,
|
259
639
|
)
|
260
640
|
for added_field_definition_model in model.fields
|
261
641
|
]
|
262
642
|
return AddFields(fields=added_field_definitions, parameters=model.parameters or {})
|
263
643
|
|
644
|
+
def create_keys_to_lower_transformation(
|
645
|
+
self, model: KeysToLowerModel, config: Config, **kwargs: Any
|
646
|
+
) -> KeysToLowerTransformation:
|
647
|
+
return KeysToLowerTransformation()
|
648
|
+
|
649
|
+
def create_keys_to_snake_transformation(
|
650
|
+
self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
|
651
|
+
) -> KeysToSnakeCaseTransformation:
|
652
|
+
return KeysToSnakeCaseTransformation()
|
653
|
+
|
654
|
+
def create_keys_replace_transformation(
|
655
|
+
self, model: KeysReplaceModel, config: Config, **kwargs: Any
|
656
|
+
) -> KeysReplaceTransformation:
|
657
|
+
return KeysReplaceTransformation(
|
658
|
+
old=model.old, new=model.new, parameters=model.parameters or {}
|
659
|
+
)
|
660
|
+
|
661
|
+
def create_flatten_fields(
|
662
|
+
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
663
|
+
) -> FlattenFields:
|
664
|
+
return FlattenFields(
|
665
|
+
flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
|
666
|
+
)
|
667
|
+
|
264
668
|
@staticmethod
|
265
669
|
def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
|
266
670
|
if not value_type:
|
@@ -275,16 +679,25 @@ class ModelToComponentFactory:
|
|
275
679
|
|
276
680
|
@staticmethod
|
277
681
|
def create_api_key_authenticator(
|
278
|
-
model: ApiKeyAuthenticatorModel,
|
682
|
+
model: ApiKeyAuthenticatorModel,
|
683
|
+
config: Config,
|
684
|
+
token_provider: Optional[TokenProvider] = None,
|
685
|
+
**kwargs: Any,
|
279
686
|
) -> ApiKeyAuthenticator:
|
280
687
|
if model.inject_into is None and model.header is None:
|
281
|
-
raise ValueError(
|
688
|
+
raise ValueError(
|
689
|
+
"Expected either inject_into or header to be set for ApiKeyAuthenticator"
|
690
|
+
)
|
282
691
|
|
283
692
|
if model.inject_into is not None and model.header is not None:
|
284
|
-
raise ValueError(
|
693
|
+
raise ValueError(
|
694
|
+
"inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
|
695
|
+
)
|
285
696
|
|
286
697
|
if token_provider is not None and model.api_token != "":
|
287
|
-
raise ValueError(
|
698
|
+
raise ValueError(
|
699
|
+
"If token_provider is set, api_token is ignored and has to be set to empty string."
|
700
|
+
)
|
288
701
|
|
289
702
|
request_option = (
|
290
703
|
RequestOption(
|
@@ -300,54 +713,128 @@ class ModelToComponentFactory:
|
|
300
713
|
)
|
301
714
|
)
|
302
715
|
return ApiKeyAuthenticator(
|
303
|
-
token_provider=
|
304
|
-
|
305
|
-
|
716
|
+
token_provider=(
|
717
|
+
token_provider
|
718
|
+
if token_provider is not None
|
719
|
+
else InterpolatedStringTokenProvider(
|
720
|
+
api_token=model.api_token or "",
|
721
|
+
config=config,
|
722
|
+
parameters=model.parameters or {},
|
723
|
+
)
|
724
|
+
),
|
306
725
|
request_option=request_option,
|
307
726
|
config=config,
|
308
727
|
parameters=model.parameters or {},
|
309
728
|
)
|
310
729
|
|
730
|
+
def create_legacy_to_per_partition_state_migration(
|
731
|
+
self,
|
732
|
+
model: LegacyToPerPartitionStateMigrationModel,
|
733
|
+
config: Mapping[str, Any],
|
734
|
+
declarative_stream: DeclarativeStreamModel,
|
735
|
+
) -> LegacyToPerPartitionStateMigration:
|
736
|
+
retriever = declarative_stream.retriever
|
737
|
+
if not isinstance(retriever, SimpleRetrieverModel):
|
738
|
+
raise ValueError(
|
739
|
+
f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}"
|
740
|
+
)
|
741
|
+
partition_router = retriever.partition_router
|
742
|
+
if not isinstance(
|
743
|
+
partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
|
744
|
+
):
|
745
|
+
raise ValueError(
|
746
|
+
f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
|
747
|
+
)
|
748
|
+
if not hasattr(partition_router, "parent_stream_configs"):
|
749
|
+
raise ValueError(
|
750
|
+
"LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
|
751
|
+
)
|
752
|
+
|
753
|
+
if not hasattr(declarative_stream, "incremental_sync"):
|
754
|
+
raise ValueError(
|
755
|
+
"LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
|
756
|
+
)
|
757
|
+
|
758
|
+
return LegacyToPerPartitionStateMigration(
|
759
|
+
partition_router, # type: ignore # was already checked above
|
760
|
+
declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams.
|
761
|
+
config,
|
762
|
+
declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
|
763
|
+
)
|
764
|
+
|
311
765
|
def create_session_token_authenticator(
|
312
766
|
self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
|
313
767
|
) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
|
314
|
-
|
768
|
+
decoder = (
|
769
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
770
|
+
if model.decoder
|
771
|
+
else JsonDecoder(parameters={})
|
772
|
+
)
|
773
|
+
login_requester = self._create_component_from_model(
|
774
|
+
model=model.login_requester,
|
775
|
+
config=config,
|
776
|
+
name=f"{name}_login_requester",
|
777
|
+
decoder=decoder,
|
778
|
+
)
|
315
779
|
token_provider = SessionTokenProvider(
|
316
780
|
login_requester=login_requester,
|
317
781
|
session_token_path=model.session_token_path,
|
318
|
-
expiration_duration=parse_duration(model.expiration_duration)
|
782
|
+
expiration_duration=parse_duration(model.expiration_duration)
|
783
|
+
if model.expiration_duration
|
784
|
+
else None,
|
319
785
|
parameters=model.parameters or {},
|
320
786
|
message_repository=self._message_repository,
|
787
|
+
decoder=decoder,
|
321
788
|
)
|
322
789
|
if model.request_authentication.type == "Bearer":
|
323
790
|
return ModelToComponentFactory.create_bearer_authenticator(
|
324
791
|
BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value
|
325
792
|
config,
|
326
|
-
token_provider=token_provider,
|
793
|
+
token_provider=token_provider,
|
327
794
|
)
|
328
795
|
else:
|
329
796
|
return ModelToComponentFactory.create_api_key_authenticator(
|
330
|
-
ApiKeyAuthenticatorModel(
|
797
|
+
ApiKeyAuthenticatorModel(
|
798
|
+
type="ApiKeyAuthenticator",
|
799
|
+
api_token="",
|
800
|
+
inject_into=model.request_authentication.inject_into,
|
801
|
+
), # type: ignore # $parameters and headers default to None
|
331
802
|
config=config,
|
332
803
|
token_provider=token_provider,
|
333
804
|
)
|
334
805
|
|
335
806
|
@staticmethod
|
336
|
-
def create_basic_http_authenticator(
|
807
|
+
def create_basic_http_authenticator(
|
808
|
+
model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
|
809
|
+
) -> BasicHttpAuthenticator:
|
337
810
|
return BasicHttpAuthenticator(
|
338
|
-
password=model.password or "",
|
811
|
+
password=model.password or "",
|
812
|
+
username=model.username,
|
813
|
+
config=config,
|
814
|
+
parameters=model.parameters or {},
|
339
815
|
)
|
340
816
|
|
341
817
|
@staticmethod
|
342
818
|
def create_bearer_authenticator(
|
343
|
-
model: BearerAuthenticatorModel,
|
819
|
+
model: BearerAuthenticatorModel,
|
820
|
+
config: Config,
|
821
|
+
token_provider: Optional[TokenProvider] = None,
|
822
|
+
**kwargs: Any,
|
344
823
|
) -> BearerAuthenticator:
|
345
824
|
if token_provider is not None and model.api_token != "":
|
346
|
-
raise ValueError(
|
825
|
+
raise ValueError(
|
826
|
+
"If token_provider is set, api_token is ignored and has to be set to empty string."
|
827
|
+
)
|
347
828
|
return BearerAuthenticator(
|
348
|
-
token_provider=
|
349
|
-
|
350
|
-
|
829
|
+
token_provider=(
|
830
|
+
token_provider
|
831
|
+
if token_provider is not None
|
832
|
+
else InterpolatedStringTokenProvider(
|
833
|
+
api_token=model.api_token or "",
|
834
|
+
config=config,
|
835
|
+
parameters=model.parameters or {},
|
836
|
+
)
|
837
|
+
),
|
351
838
|
config=config,
|
352
839
|
parameters=model.parameters or {},
|
353
840
|
)
|
@@ -356,29 +843,277 @@ class ModelToComponentFactory:
|
|
356
843
|
def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
|
357
844
|
return CheckStream(stream_names=model.stream_names, parameters={})
|
358
845
|
|
359
|
-
def create_composite_error_handler(
|
846
|
+
def create_composite_error_handler(
|
847
|
+
self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
|
848
|
+
) -> CompositeErrorHandler:
|
360
849
|
error_handlers = [
|
361
|
-
self._create_component_from_model(model=error_handler_model, config=config)
|
850
|
+
self._create_component_from_model(model=error_handler_model, config=config)
|
851
|
+
for error_handler_model in model.error_handlers
|
362
852
|
]
|
363
|
-
return CompositeErrorHandler(
|
853
|
+
return CompositeErrorHandler(
|
854
|
+
error_handlers=error_handlers, parameters=model.parameters or {}
|
855
|
+
)
|
856
|
+
|
857
|
+
@staticmethod
|
858
|
+
def create_concurrency_level(
|
859
|
+
model: ConcurrencyLevelModel, config: Config, **kwargs: Any
|
860
|
+
) -> ConcurrencyLevel:
|
861
|
+
return ConcurrencyLevel(
|
862
|
+
default_concurrency=model.default_concurrency,
|
863
|
+
max_concurrency=model.max_concurrency,
|
864
|
+
config=config,
|
865
|
+
parameters={},
|
866
|
+
)
|
867
|
+
|
868
|
+
def create_concurrent_cursor_from_datetime_based_cursor(
|
869
|
+
self,
|
870
|
+
state_manager: ConnectorStateManager,
|
871
|
+
model_type: Type[BaseModel],
|
872
|
+
component_definition: ComponentDefinition,
|
873
|
+
stream_name: str,
|
874
|
+
stream_namespace: Optional[str],
|
875
|
+
config: Config,
|
876
|
+
stream_state: MutableMapping[str, Any],
|
877
|
+
message_repository: Optional[MessageRepository] = None,
|
878
|
+
runtime_lookback_window: Optional[int] = None,
|
879
|
+
**kwargs: Any,
|
880
|
+
) -> ConcurrentCursor:
|
881
|
+
component_type = component_definition.get("type")
|
882
|
+
if component_definition.get("type") != model_type.__name__:
|
883
|
+
raise ValueError(
|
884
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
885
|
+
)
|
886
|
+
|
887
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
888
|
+
|
889
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
890
|
+
raise ValueError(
|
891
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
892
|
+
)
|
893
|
+
|
894
|
+
interpolated_cursor_field = InterpolatedString.create(
|
895
|
+
datetime_based_cursor_model.cursor_field,
|
896
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
897
|
+
)
|
898
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
899
|
+
|
900
|
+
interpolated_partition_field_start = InterpolatedString.create(
|
901
|
+
datetime_based_cursor_model.partition_field_start or "start_time",
|
902
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
903
|
+
)
|
904
|
+
interpolated_partition_field_end = InterpolatedString.create(
|
905
|
+
datetime_based_cursor_model.partition_field_end or "end_time",
|
906
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
907
|
+
)
|
908
|
+
|
909
|
+
slice_boundary_fields = (
|
910
|
+
interpolated_partition_field_start.eval(config=config),
|
911
|
+
interpolated_partition_field_end.eval(config=config),
|
912
|
+
)
|
913
|
+
|
914
|
+
datetime_format = datetime_based_cursor_model.datetime_format
|
915
|
+
|
916
|
+
cursor_granularity = (
|
917
|
+
parse_duration(datetime_based_cursor_model.cursor_granularity)
|
918
|
+
if datetime_based_cursor_model.cursor_granularity
|
919
|
+
else None
|
920
|
+
)
|
921
|
+
|
922
|
+
lookback_window = None
|
923
|
+
interpolated_lookback_window = (
|
924
|
+
InterpolatedString.create(
|
925
|
+
datetime_based_cursor_model.lookback_window,
|
926
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
927
|
+
)
|
928
|
+
if datetime_based_cursor_model.lookback_window
|
929
|
+
else None
|
930
|
+
)
|
931
|
+
if interpolated_lookback_window:
|
932
|
+
evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
|
933
|
+
if evaluated_lookback_window:
|
934
|
+
lookback_window = parse_duration(evaluated_lookback_window)
|
935
|
+
|
936
|
+
if runtime_lookback_window and lookback_window:
|
937
|
+
lookback_window = max(lookback_window, runtime_lookback_window)
|
938
|
+
elif runtime_lookback_window:
|
939
|
+
lookback_window = runtime_lookback_window
|
940
|
+
|
941
|
+
connector_state_converter: DateTimeStreamStateConverter
|
942
|
+
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
943
|
+
datetime_format=datetime_format,
|
944
|
+
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
945
|
+
is_sequential_state=True,
|
946
|
+
cursor_granularity=cursor_granularity,
|
947
|
+
)
|
948
|
+
|
949
|
+
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
950
|
+
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
951
|
+
start_date_runtime_value = self.create_min_max_datetime(
|
952
|
+
model=datetime_based_cursor_model.start_datetime, config=config
|
953
|
+
)
|
954
|
+
else:
|
955
|
+
start_date_runtime_value = datetime_based_cursor_model.start_datetime
|
956
|
+
|
957
|
+
end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
|
958
|
+
if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
|
959
|
+
end_date_runtime_value = self.create_min_max_datetime(
|
960
|
+
model=datetime_based_cursor_model.end_datetime, config=config
|
961
|
+
)
|
962
|
+
else:
|
963
|
+
end_date_runtime_value = datetime_based_cursor_model.end_datetime
|
964
|
+
|
965
|
+
interpolated_start_date = MinMaxDatetime.create(
|
966
|
+
interpolated_string_or_min_max_datetime=start_date_runtime_value,
|
967
|
+
parameters=datetime_based_cursor_model.parameters,
|
968
|
+
)
|
969
|
+
interpolated_end_date = (
|
970
|
+
None
|
971
|
+
if not end_date_runtime_value
|
972
|
+
else MinMaxDatetime.create(
|
973
|
+
end_date_runtime_value, datetime_based_cursor_model.parameters
|
974
|
+
)
|
975
|
+
)
|
976
|
+
|
977
|
+
# If datetime format is not specified then start/end datetime should inherit it from the stream slicer
|
978
|
+
if not interpolated_start_date.datetime_format:
|
979
|
+
interpolated_start_date.datetime_format = datetime_format
|
980
|
+
if interpolated_end_date and not interpolated_end_date.datetime_format:
|
981
|
+
interpolated_end_date.datetime_format = datetime_format
|
982
|
+
|
983
|
+
start_date = interpolated_start_date.get_datetime(config=config)
|
984
|
+
end_date_provider = (
|
985
|
+
partial(interpolated_end_date.get_datetime, config)
|
986
|
+
if interpolated_end_date
|
987
|
+
else connector_state_converter.get_end_provider()
|
988
|
+
)
|
989
|
+
|
990
|
+
if (
|
991
|
+
datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
|
992
|
+
) or (
|
993
|
+
not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
|
994
|
+
):
|
995
|
+
raise ValueError(
|
996
|
+
f"If step is defined, cursor_granularity should be as well and vice-versa. "
|
997
|
+
f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
|
998
|
+
)
|
999
|
+
|
1000
|
+
# When step is not defined, default to a step size from the starting date to the present moment
|
1001
|
+
step_length = datetime.timedelta.max
|
1002
|
+
interpolated_step = (
|
1003
|
+
InterpolatedString.create(
|
1004
|
+
datetime_based_cursor_model.step,
|
1005
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1006
|
+
)
|
1007
|
+
if datetime_based_cursor_model.step
|
1008
|
+
else None
|
1009
|
+
)
|
1010
|
+
if interpolated_step:
|
1011
|
+
evaluated_step = interpolated_step.eval(config)
|
1012
|
+
if evaluated_step:
|
1013
|
+
step_length = parse_duration(evaluated_step)
|
1014
|
+
|
1015
|
+
return ConcurrentCursor(
|
1016
|
+
stream_name=stream_name,
|
1017
|
+
stream_namespace=stream_namespace,
|
1018
|
+
stream_state=stream_state,
|
1019
|
+
message_repository=message_repository or self._message_repository,
|
1020
|
+
connector_state_manager=state_manager,
|
1021
|
+
connector_state_converter=connector_state_converter,
|
1022
|
+
cursor_field=cursor_field,
|
1023
|
+
slice_boundary_fields=slice_boundary_fields,
|
1024
|
+
start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1025
|
+
end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1026
|
+
lookback_window=lookback_window,
|
1027
|
+
slice_range=step_length,
|
1028
|
+
cursor_granularity=cursor_granularity,
|
1029
|
+
)
|
1030
|
+
|
1031
|
+
def create_concurrent_cursor_from_perpartition_cursor(
|
1032
|
+
self,
|
1033
|
+
state_manager: ConnectorStateManager,
|
1034
|
+
model_type: Type[BaseModel],
|
1035
|
+
component_definition: ComponentDefinition,
|
1036
|
+
stream_name: str,
|
1037
|
+
stream_namespace: Optional[str],
|
1038
|
+
config: Config,
|
1039
|
+
stream_state: MutableMapping[str, Any],
|
1040
|
+
partition_router,
|
1041
|
+
**kwargs: Any,
|
1042
|
+
) -> ConcurrentPerPartitionCursor:
|
1043
|
+
component_type = component_definition.get("type")
|
1044
|
+
if component_definition.get("type") != model_type.__name__:
|
1045
|
+
raise ValueError(
|
1046
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1047
|
+
)
|
1048
|
+
|
1049
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1050
|
+
|
1051
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1052
|
+
raise ValueError(
|
1053
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1054
|
+
)
|
1055
|
+
|
1056
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1057
|
+
datetime_based_cursor_model.cursor_field,
|
1058
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1059
|
+
)
|
1060
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1061
|
+
|
1062
|
+
# Create the cursor factory
|
1063
|
+
cursor_factory = ConcurrentCursorFactory(
|
1064
|
+
partial(
|
1065
|
+
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1066
|
+
state_manager=state_manager,
|
1067
|
+
model_type=model_type,
|
1068
|
+
component_definition=component_definition,
|
1069
|
+
stream_name=stream_name,
|
1070
|
+
stream_namespace=stream_namespace,
|
1071
|
+
config=config,
|
1072
|
+
message_repository=NoopMessageRepository(),
|
1073
|
+
)
|
1074
|
+
)
|
1075
|
+
|
1076
|
+
# Return the concurrent cursor and state converter
|
1077
|
+
return ConcurrentPerPartitionCursor(
|
1078
|
+
cursor_factory=cursor_factory,
|
1079
|
+
partition_router=partition_router,
|
1080
|
+
stream_name=stream_name,
|
1081
|
+
stream_namespace=stream_namespace,
|
1082
|
+
stream_state=stream_state,
|
1083
|
+
message_repository=self._message_repository, # type: ignore
|
1084
|
+
connector_state_manager=state_manager,
|
1085
|
+
cursor_field=cursor_field,
|
1086
|
+
)
|
364
1087
|
|
365
1088
|
@staticmethod
|
366
|
-
def create_constant_backoff_strategy(
|
1089
|
+
def create_constant_backoff_strategy(
|
1090
|
+
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
1091
|
+
) -> ConstantBackoffStrategy:
|
367
1092
|
return ConstantBackoffStrategy(
|
368
1093
|
backoff_time_in_seconds=model.backoff_time_in_seconds,
|
369
1094
|
config=config,
|
370
1095
|
parameters=model.parameters or {},
|
371
1096
|
)
|
372
1097
|
|
373
|
-
def create_cursor_pagination(
|
374
|
-
|
375
|
-
|
1098
|
+
def create_cursor_pagination(
|
1099
|
+
self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
|
1100
|
+
) -> CursorPaginationStrategy:
|
1101
|
+
if isinstance(decoder, PaginationDecoderDecorator):
|
1102
|
+
if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
|
1103
|
+
raise ValueError(
|
1104
|
+
f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1105
|
+
)
|
1106
|
+
decoder_to_use = decoder
|
376
1107
|
else:
|
377
|
-
|
1108
|
+
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
1109
|
+
raise ValueError(
|
1110
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1111
|
+
)
|
1112
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
378
1113
|
|
379
1114
|
return CursorPaginationStrategy(
|
380
1115
|
cursor_value=model.cursor_value,
|
381
|
-
decoder=
|
1116
|
+
decoder=decoder_to_use,
|
382
1117
|
page_size=model.page_size,
|
383
1118
|
stop_condition=model.stop_condition,
|
384
1119
|
config=config,
|
@@ -409,18 +1144,28 @@ class ModelToComponentFactory:
|
|
409
1144
|
# the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
|
410
1145
|
for model_field, model_value in model_args.items():
|
411
1146
|
# If a custom component field doesn't have a type set, we try to use the type hints to infer the type
|
412
|
-
if
|
413
|
-
|
1147
|
+
if (
|
1148
|
+
isinstance(model_value, dict)
|
1149
|
+
and "type" not in model_value
|
1150
|
+
and model_field in component_fields
|
1151
|
+
):
|
1152
|
+
derived_type = self._derive_component_type_from_type_hints(
|
1153
|
+
component_fields.get(model_field)
|
1154
|
+
)
|
414
1155
|
if derived_type:
|
415
1156
|
model_value["type"] = derived_type
|
416
1157
|
|
417
1158
|
if self._is_component(model_value):
|
418
|
-
model_args[model_field] = self._create_nested_component(
|
1159
|
+
model_args[model_field] = self._create_nested_component(
|
1160
|
+
model, model_field, model_value, config
|
1161
|
+
)
|
419
1162
|
elif isinstance(model_value, list):
|
420
1163
|
vals = []
|
421
1164
|
for v in model_value:
|
422
1165
|
if isinstance(v, dict) and "type" not in v and model_field in component_fields:
|
423
|
-
derived_type = self._derive_component_type_from_type_hints(
|
1166
|
+
derived_type = self._derive_component_type_from_type_hints(
|
1167
|
+
component_fields.get(model_field)
|
1168
|
+
)
|
424
1169
|
if derived_type:
|
425
1170
|
v["type"] = derived_type
|
426
1171
|
if self._is_component(v):
|
@@ -429,7 +1174,11 @@ class ModelToComponentFactory:
|
|
429
1174
|
vals.append(v)
|
430
1175
|
model_args[model_field] = vals
|
431
1176
|
|
432
|
-
kwargs = {
|
1177
|
+
kwargs = {
|
1178
|
+
class_field: model_args[class_field]
|
1179
|
+
for class_field in component_fields.keys()
|
1180
|
+
if class_field in model_args
|
1181
|
+
}
|
433
1182
|
return custom_component_class(**kwargs)
|
434
1183
|
|
435
1184
|
@staticmethod
|
@@ -473,7 +1222,9 @@ class ModelToComponentFactory:
|
|
473
1222
|
else:
|
474
1223
|
return []
|
475
1224
|
|
476
|
-
def _create_nested_component(
|
1225
|
+
def _create_nested_component(
|
1226
|
+
self, model: Any, model_field: str, model_value: Any, config: Config
|
1227
|
+
) -> Any:
|
477
1228
|
type_name = model_value.get("type", None)
|
478
1229
|
if not type_name:
|
479
1230
|
# If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
|
@@ -492,16 +1243,29 @@ class ModelToComponentFactory:
|
|
492
1243
|
model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
|
493
1244
|
constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
|
494
1245
|
model_parameters = model_value.get("$parameters", {})
|
495
|
-
matching_parameters = {
|
496
|
-
|
1246
|
+
matching_parameters = {
|
1247
|
+
kwarg: model_parameters[kwarg]
|
1248
|
+
for kwarg in constructor_kwargs
|
1249
|
+
if kwarg in model_parameters
|
1250
|
+
}
|
1251
|
+
return self._create_component_from_model(
|
1252
|
+
model=parsed_model, config=config, **matching_parameters
|
1253
|
+
)
|
497
1254
|
except TypeError as error:
|
498
1255
|
missing_parameters = self._extract_missing_parameters(error)
|
499
1256
|
if missing_parameters:
|
500
1257
|
raise ValueError(
|
501
1258
|
f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
|
502
|
-
+ ", ".join(
|
1259
|
+
+ ", ".join(
|
1260
|
+
(
|
1261
|
+
f"{type_name}.$parameters.{parameter}"
|
1262
|
+
for parameter in missing_parameters
|
1263
|
+
)
|
1264
|
+
)
|
503
1265
|
)
|
504
|
-
raise TypeError(
|
1266
|
+
raise TypeError(
|
1267
|
+
f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
|
1268
|
+
)
|
505
1269
|
else:
|
506
1270
|
raise ValueError(
|
507
1271
|
f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
|
@@ -511,16 +1275,26 @@ class ModelToComponentFactory:
|
|
511
1275
|
def _is_component(model_value: Any) -> bool:
|
512
1276
|
return isinstance(model_value, dict) and model_value.get("type") is not None
|
513
1277
|
|
514
|
-
def create_datetime_based_cursor(
|
1278
|
+
def create_datetime_based_cursor(
|
1279
|
+
self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
|
1280
|
+
) -> DatetimeBasedCursor:
|
515
1281
|
start_datetime: Union[str, MinMaxDatetime] = (
|
516
|
-
model.start_datetime
|
1282
|
+
model.start_datetime
|
1283
|
+
if isinstance(model.start_datetime, str)
|
1284
|
+
else self.create_min_max_datetime(model.start_datetime, config)
|
517
1285
|
)
|
518
1286
|
end_datetime: Union[str, MinMaxDatetime, None] = None
|
519
1287
|
if model.is_data_feed and model.end_datetime:
|
520
1288
|
raise ValueError("Data feed does not support end_datetime")
|
1289
|
+
if model.is_data_feed and model.is_client_side_incremental:
|
1290
|
+
raise ValueError(
|
1291
|
+
"`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
|
1292
|
+
)
|
521
1293
|
if model.end_datetime:
|
522
1294
|
end_datetime = (
|
523
|
-
model.end_datetime
|
1295
|
+
model.end_datetime
|
1296
|
+
if isinstance(model.end_datetime, str)
|
1297
|
+
else self.create_min_max_datetime(model.end_datetime, config)
|
524
1298
|
)
|
525
1299
|
|
526
1300
|
end_time_option = (
|
@@ -544,7 +1318,9 @@ class ModelToComponentFactory:
|
|
544
1318
|
|
545
1319
|
return DatetimeBasedCursor(
|
546
1320
|
cursor_field=model.cursor_field,
|
547
|
-
cursor_datetime_formats=model.cursor_datetime_formats
|
1321
|
+
cursor_datetime_formats=model.cursor_datetime_formats
|
1322
|
+
if model.cursor_datetime_formats
|
1323
|
+
else [],
|
548
1324
|
cursor_granularity=model.cursor_granularity,
|
549
1325
|
datetime_format=model.datetime_format,
|
550
1326
|
end_datetime=end_datetime,
|
@@ -556,11 +1332,14 @@ class ModelToComponentFactory:
|
|
556
1332
|
partition_field_end=model.partition_field_end,
|
557
1333
|
partition_field_start=model.partition_field_start,
|
558
1334
|
message_repository=self._message_repository,
|
1335
|
+
is_compare_strictly=model.is_compare_strictly,
|
559
1336
|
config=config,
|
560
1337
|
parameters=model.parameters or {},
|
561
1338
|
)
|
562
1339
|
|
563
|
-
def create_declarative_stream(
|
1340
|
+
def create_declarative_stream(
|
1341
|
+
self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
|
1342
|
+
) -> DeclarativeStream:
|
564
1343
|
# When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
|
565
1344
|
# components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
|
566
1345
|
# Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
|
@@ -569,25 +1348,99 @@ class ModelToComponentFactory:
|
|
569
1348
|
|
570
1349
|
primary_key = model.primary_key.__root__ if model.primary_key else None
|
571
1350
|
stop_condition_on_cursor = (
|
572
|
-
model.incremental_sync
|
1351
|
+
model.incremental_sync
|
1352
|
+
and hasattr(model.incremental_sync, "is_data_feed")
|
1353
|
+
and model.incremental_sync.is_data_feed
|
573
1354
|
)
|
1355
|
+
client_side_incremental_sync = None
|
1356
|
+
if (
|
1357
|
+
model.incremental_sync
|
1358
|
+
and hasattr(model.incremental_sync, "is_client_side_incremental")
|
1359
|
+
and model.incremental_sync.is_client_side_incremental
|
1360
|
+
):
|
1361
|
+
supported_slicers = (
|
1362
|
+
DatetimeBasedCursor,
|
1363
|
+
GlobalSubstreamCursor,
|
1364
|
+
PerPartitionWithGlobalCursor,
|
1365
|
+
)
|
1366
|
+
if combined_slicers and not isinstance(combined_slicers, supported_slicers):
|
1367
|
+
raise ValueError(
|
1368
|
+
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1369
|
+
)
|
1370
|
+
cursor = (
|
1371
|
+
combined_slicers
|
1372
|
+
if isinstance(
|
1373
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1374
|
+
)
|
1375
|
+
else self._create_component_from_model(model=model.incremental_sync, config=config)
|
1376
|
+
)
|
1377
|
+
|
1378
|
+
client_side_incremental_sync = {"cursor": cursor}
|
1379
|
+
|
1380
|
+
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1381
|
+
cursor_model = model.incremental_sync
|
1382
|
+
|
1383
|
+
end_time_option = (
|
1384
|
+
RequestOption(
|
1385
|
+
inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
|
1386
|
+
field_name=cursor_model.end_time_option.field_name,
|
1387
|
+
parameters=cursor_model.parameters or {},
|
1388
|
+
)
|
1389
|
+
if cursor_model.end_time_option
|
1390
|
+
else None
|
1391
|
+
)
|
1392
|
+
start_time_option = (
|
1393
|
+
RequestOption(
|
1394
|
+
inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
|
1395
|
+
field_name=cursor_model.start_time_option.field_name,
|
1396
|
+
parameters=cursor_model.parameters or {},
|
1397
|
+
)
|
1398
|
+
if cursor_model.start_time_option
|
1399
|
+
else None
|
1400
|
+
)
|
1401
|
+
|
1402
|
+
request_options_provider = DatetimeBasedRequestOptionsProvider(
|
1403
|
+
start_time_option=start_time_option,
|
1404
|
+
end_time_option=end_time_option,
|
1405
|
+
partition_field_start=cursor_model.partition_field_end,
|
1406
|
+
partition_field_end=cursor_model.partition_field_end,
|
1407
|
+
config=config,
|
1408
|
+
parameters=model.parameters or {},
|
1409
|
+
)
|
1410
|
+
else:
|
1411
|
+
request_options_provider = None
|
1412
|
+
|
574
1413
|
transformations = []
|
575
1414
|
if model.transformations:
|
576
1415
|
for transformation_model in model.transformations:
|
577
|
-
transformations.append(
|
1416
|
+
transformations.append(
|
1417
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
1418
|
+
)
|
578
1419
|
retriever = self._create_component_from_model(
|
579
1420
|
model=model.retriever,
|
580
1421
|
config=config,
|
581
1422
|
name=model.name,
|
582
1423
|
primary_key=primary_key,
|
583
1424
|
stream_slicer=combined_slicers,
|
1425
|
+
request_options_provider=request_options_provider,
|
584
1426
|
stop_condition_on_cursor=stop_condition_on_cursor,
|
1427
|
+
client_side_incremental_sync=client_side_incremental_sync,
|
585
1428
|
transformations=transformations,
|
586
1429
|
)
|
587
1430
|
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
588
1431
|
|
1432
|
+
if model.state_migrations:
|
1433
|
+
state_transformations = [
|
1434
|
+
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
1435
|
+
for state_migration in model.state_migrations
|
1436
|
+
]
|
1437
|
+
else:
|
1438
|
+
state_transformations = []
|
1439
|
+
|
589
1440
|
if model.schema_loader:
|
590
|
-
schema_loader = self._create_component_from_model(
|
1441
|
+
schema_loader = self._create_component_from_model(
|
1442
|
+
model=model.schema_loader, config=config
|
1443
|
+
)
|
591
1444
|
else:
|
592
1445
|
options = model.parameters or {}
|
593
1446
|
if "name" not in options:
|
@@ -600,58 +1453,113 @@ class ModelToComponentFactory:
|
|
600
1453
|
retriever=retriever,
|
601
1454
|
schema_loader=schema_loader,
|
602
1455
|
stream_cursor_field=cursor_field or "",
|
1456
|
+
state_migrations=state_transformations,
|
603
1457
|
config=config,
|
604
1458
|
parameters=model.parameters or {},
|
605
1459
|
)
|
606
1460
|
|
607
|
-
def
|
608
|
-
|
609
|
-
|
610
|
-
|
1461
|
+
def _build_stream_slicer_from_partition_router(
|
1462
|
+
self,
|
1463
|
+
model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
|
1464
|
+
config: Config,
|
1465
|
+
) -> Optional[PartitionRouter]:
|
1466
|
+
if (
|
1467
|
+
hasattr(model, "partition_router")
|
1468
|
+
and isinstance(model, SimpleRetrieverModel)
|
1469
|
+
and model.partition_router
|
1470
|
+
):
|
1471
|
+
stream_slicer_model = model.partition_router
|
1472
|
+
|
611
1473
|
if isinstance(stream_slicer_model, list):
|
612
|
-
|
613
|
-
[
|
1474
|
+
return CartesianProductStreamSlicer(
|
1475
|
+
[
|
1476
|
+
self._create_component_from_model(model=slicer, config=config)
|
1477
|
+
for slicer in stream_slicer_model
|
1478
|
+
],
|
1479
|
+
parameters={},
|
614
1480
|
)
|
615
1481
|
else:
|
616
|
-
|
1482
|
+
return self._create_component_from_model(model=stream_slicer_model, config=config) # type: ignore[no-any-return]
|
1483
|
+
# Will be created PartitionRouter as stream_slicer_model is model.partition_router
|
1484
|
+
return None
|
1485
|
+
|
1486
|
+
def _build_resumable_cursor_from_paginator(
|
1487
|
+
self,
|
1488
|
+
model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
|
1489
|
+
stream_slicer: Optional[StreamSlicer],
|
1490
|
+
) -> Optional[StreamSlicer]:
|
1491
|
+
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
1492
|
+
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
1493
|
+
return ResumableFullRefreshCursor(parameters={})
|
1494
|
+
return None
|
1495
|
+
|
1496
|
+
def _merge_stream_slicers(
|
1497
|
+
self, model: DeclarativeStreamModel, config: Config
|
1498
|
+
) -> Optional[StreamSlicer]:
|
1499
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
617
1500
|
|
618
1501
|
if model.incremental_sync and stream_slicer:
|
619
1502
|
incremental_sync_model = model.incremental_sync
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
1503
|
+
if (
|
1504
|
+
hasattr(incremental_sync_model, "global_substream_cursor")
|
1505
|
+
and incremental_sync_model.global_substream_cursor
|
1506
|
+
):
|
1507
|
+
cursor_component = self._create_component_from_model(
|
1508
|
+
model=incremental_sync_model, config=config
|
1509
|
+
)
|
1510
|
+
return GlobalSubstreamCursor(
|
1511
|
+
stream_cursor=cursor_component, partition_router=stream_slicer
|
1512
|
+
)
|
1513
|
+
else:
|
1514
|
+
cursor_component = self._create_component_from_model(
|
1515
|
+
model=incremental_sync_model, config=config
|
1516
|
+
)
|
1517
|
+
return PerPartitionWithGlobalCursor(
|
1518
|
+
cursor_factory=CursorFactory(
|
1519
|
+
lambda: self._create_component_from_model(
|
1520
|
+
model=incremental_sync_model, config=config
|
1521
|
+
),
|
1522
|
+
),
|
1523
|
+
partition_router=stream_slicer,
|
1524
|
+
stream_cursor=cursor_component,
|
1525
|
+
)
|
626
1526
|
elif model.incremental_sync:
|
627
|
-
return
|
628
|
-
|
1527
|
+
return (
|
1528
|
+
self._create_component_from_model(model=model.incremental_sync, config=config)
|
1529
|
+
if model.incremental_sync
|
1530
|
+
else None
|
1531
|
+
)
|
1532
|
+
elif self._disable_resumable_full_refresh:
|
629
1533
|
return stream_slicer
|
630
|
-
|
631
|
-
|
1534
|
+
elif stream_slicer:
|
1535
|
+
# For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
|
1536
|
+
return PerPartitionCursor(
|
1537
|
+
cursor_factory=CursorFactory(
|
1538
|
+
create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
|
1539
|
+
),
|
1540
|
+
partition_router=stream_slicer,
|
1541
|
+
)
|
1542
|
+
return self._build_resumable_cursor_from_paginator(model.retriever, stream_slicer)
|
632
1543
|
|
633
|
-
def create_default_error_handler(
|
1544
|
+
def create_default_error_handler(
|
1545
|
+
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
1546
|
+
) -> DefaultErrorHandler:
|
634
1547
|
backoff_strategies = []
|
635
1548
|
if model.backoff_strategies:
|
636
1549
|
for backoff_strategy_model in model.backoff_strategies:
|
637
|
-
backoff_strategies.append(
|
638
|
-
|
639
|
-
|
1550
|
+
backoff_strategies.append(
|
1551
|
+
self._create_component_from_model(model=backoff_strategy_model, config=config)
|
1552
|
+
)
|
640
1553
|
|
641
1554
|
response_filters = []
|
642
1555
|
if model.response_filters:
|
643
1556
|
for response_filter_model in model.response_filters:
|
644
|
-
response_filters.append(
|
645
|
-
|
646
|
-
response_filters.append(
|
647
|
-
HttpResponseFilter(
|
648
|
-
ResponseAction.RETRY,
|
649
|
-
http_codes=HttpResponseFilter.DEFAULT_RETRIABLE_ERRORS,
|
650
|
-
config=config,
|
651
|
-
parameters=model.parameters or {},
|
1557
|
+
response_filters.append(
|
1558
|
+
self._create_component_from_model(model=response_filter_model, config=config)
|
652
1559
|
)
|
653
|
-
|
654
|
-
|
1560
|
+
response_filters.append(
|
1561
|
+
HttpResponseFilter(config=config, parameters=model.parameters or {})
|
1562
|
+
)
|
655
1563
|
|
656
1564
|
return DefaultErrorHandler(
|
657
1565
|
backoff_strategies=backoff_strategies,
|
@@ -662,23 +1570,41 @@ class ModelToComponentFactory:
|
|
662
1570
|
)
|
663
1571
|
|
664
1572
|
def create_default_paginator(
|
665
|
-
self,
|
1573
|
+
self,
|
1574
|
+
model: DefaultPaginatorModel,
|
1575
|
+
config: Config,
|
1576
|
+
*,
|
1577
|
+
url_base: str,
|
1578
|
+
decoder: Optional[Decoder] = None,
|
1579
|
+
cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
|
666
1580
|
) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
|
667
|
-
|
1581
|
+
if decoder:
|
1582
|
+
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
1583
|
+
raise ValueError(
|
1584
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1585
|
+
)
|
1586
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1587
|
+
else:
|
1588
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
|
668
1589
|
page_size_option = (
|
669
|
-
self._create_component_from_model(model=model.page_size_option, config=config)
|
1590
|
+
self._create_component_from_model(model=model.page_size_option, config=config)
|
1591
|
+
if model.page_size_option
|
1592
|
+
else None
|
670
1593
|
)
|
671
1594
|
page_token_option = (
|
672
|
-
self._create_component_from_model(model=model.page_token_option, config=config)
|
1595
|
+
self._create_component_from_model(model=model.page_token_option, config=config)
|
1596
|
+
if model.page_token_option
|
1597
|
+
else None
|
1598
|
+
)
|
1599
|
+
pagination_strategy = self._create_component_from_model(
|
1600
|
+
model=model.pagination_strategy, config=config, decoder=decoder_to_use
|
673
1601
|
)
|
674
|
-
pagination_strategy = self._create_component_from_model(model=model.pagination_strategy, config=config)
|
675
1602
|
if cursor_used_for_stop_condition:
|
676
1603
|
pagination_strategy = StopConditionPaginationStrategyDecorator(
|
677
1604
|
pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
|
678
1605
|
)
|
679
|
-
|
680
1606
|
paginator = DefaultPaginator(
|
681
|
-
decoder=
|
1607
|
+
decoder=decoder_to_use,
|
682
1608
|
page_size_option=page_size_option,
|
683
1609
|
page_token_option=page_token_option,
|
684
1610
|
pagination_strategy=pagination_strategy,
|
@@ -690,25 +1616,68 @@ class ModelToComponentFactory:
|
|
690
1616
|
return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
|
691
1617
|
return paginator
|
692
1618
|
|
693
|
-
def create_dpath_extractor(
|
694
|
-
|
1619
|
+
def create_dpath_extractor(
|
1620
|
+
self,
|
1621
|
+
model: DpathExtractorModel,
|
1622
|
+
config: Config,
|
1623
|
+
decoder: Optional[Decoder] = None,
|
1624
|
+
**kwargs: Any,
|
1625
|
+
) -> DpathExtractor:
|
1626
|
+
if decoder:
|
1627
|
+
decoder_to_use = decoder
|
1628
|
+
else:
|
1629
|
+
decoder_to_use = JsonDecoder(parameters={})
|
695
1630
|
model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
|
696
|
-
return DpathExtractor(
|
1631
|
+
return DpathExtractor(
|
1632
|
+
decoder=decoder_to_use,
|
1633
|
+
field_path=model_field_path,
|
1634
|
+
config=config,
|
1635
|
+
parameters=model.parameters or {},
|
1636
|
+
)
|
1637
|
+
|
1638
|
+
def create_response_to_file_extractor(
|
1639
|
+
self,
|
1640
|
+
model: ResponseToFileExtractorModel,
|
1641
|
+
**kwargs: Any,
|
1642
|
+
) -> ResponseToFileExtractor:
|
1643
|
+
return ResponseToFileExtractor(parameters=model.parameters or {})
|
697
1644
|
|
698
1645
|
@staticmethod
|
699
|
-
def create_exponential_backoff_strategy(
|
700
|
-
|
1646
|
+
def create_exponential_backoff_strategy(
|
1647
|
+
model: ExponentialBackoffStrategyModel, config: Config
|
1648
|
+
) -> ExponentialBackoffStrategy:
|
1649
|
+
return ExponentialBackoffStrategy(
|
1650
|
+
factor=model.factor or 5, parameters=model.parameters or {}, config=config
|
1651
|
+
)
|
701
1652
|
|
702
|
-
def create_http_requester(
|
1653
|
+
def create_http_requester(
|
1654
|
+
self,
|
1655
|
+
model: HttpRequesterModel,
|
1656
|
+
config: Config,
|
1657
|
+
decoder: Decoder = JsonDecoder(parameters={}),
|
1658
|
+
*,
|
1659
|
+
name: str,
|
1660
|
+
) -> HttpRequester:
|
703
1661
|
authenticator = (
|
704
|
-
self._create_component_from_model(
|
1662
|
+
self._create_component_from_model(
|
1663
|
+
model=model.authenticator,
|
1664
|
+
config=config,
|
1665
|
+
url_base=model.url_base,
|
1666
|
+
name=name,
|
1667
|
+
decoder=decoder,
|
1668
|
+
)
|
705
1669
|
if model.authenticator
|
706
1670
|
else None
|
707
1671
|
)
|
708
1672
|
error_handler = (
|
709
1673
|
self._create_component_from_model(model=model.error_handler, config=config)
|
710
1674
|
if model.error_handler
|
711
|
-
else DefaultErrorHandler(
|
1675
|
+
else DefaultErrorHandler(
|
1676
|
+
backoff_strategies=[],
|
1677
|
+
response_filters=[],
|
1678
|
+
config=config,
|
1679
|
+
parameters=model.parameters or {},
|
1680
|
+
)
|
712
1681
|
)
|
713
1682
|
|
714
1683
|
request_options_provider = InterpolatedRequestOptionsProvider(
|
@@ -723,7 +1692,7 @@ class ModelToComponentFactory:
|
|
723
1692
|
assert model.use_cache is not None # for mypy
|
724
1693
|
assert model.http_method is not None # for mypy
|
725
1694
|
|
726
|
-
|
1695
|
+
use_cache = model.use_cache and not self._disable_cache
|
727
1696
|
|
728
1697
|
return HttpRequester(
|
729
1698
|
name=name,
|
@@ -737,18 +1706,29 @@ class ModelToComponentFactory:
|
|
737
1706
|
disable_retries=self._disable_retries,
|
738
1707
|
parameters=model.parameters or {},
|
739
1708
|
message_repository=self._message_repository,
|
740
|
-
use_cache=
|
1709
|
+
use_cache=use_cache,
|
1710
|
+
decoder=decoder,
|
1711
|
+
stream_response=decoder.is_stream_response() if decoder else False,
|
741
1712
|
)
|
742
1713
|
|
743
1714
|
@staticmethod
|
744
|
-
def create_http_response_filter(
|
745
|
-
|
1715
|
+
def create_http_response_filter(
|
1716
|
+
model: HttpResponseFilterModel, config: Config, **kwargs: Any
|
1717
|
+
) -> HttpResponseFilter:
|
1718
|
+
if model.action:
|
1719
|
+
action = ResponseAction(model.action.value)
|
1720
|
+
else:
|
1721
|
+
action = None
|
1722
|
+
|
1723
|
+
failure_type = FailureType(model.failure_type.value) if model.failure_type else None
|
1724
|
+
|
746
1725
|
http_codes = (
|
747
1726
|
set(model.http_codes) if model.http_codes else set()
|
748
1727
|
) # JSON schema notation has no set data type. The schema enforces an array of unique elements
|
749
1728
|
|
750
1729
|
return HttpResponseFilter(
|
751
1730
|
action=action,
|
1731
|
+
failure_type=failure_type,
|
752
1732
|
error_message=model.error_message or "",
|
753
1733
|
error_message_contains=model.error_message_contains or "",
|
754
1734
|
http_codes=http_codes,
|
@@ -758,19 +1738,160 @@ class ModelToComponentFactory:
|
|
758
1738
|
)
|
759
1739
|
|
760
1740
|
@staticmethod
|
761
|
-
def create_inline_schema_loader(
|
1741
|
+
def create_inline_schema_loader(
|
1742
|
+
model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
|
1743
|
+
) -> InlineSchemaLoader:
|
762
1744
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
763
1745
|
|
1746
|
+
@staticmethod
|
1747
|
+
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
|
1748
|
+
return TypesMap(target_type=model.target_type, current_type=model.current_type)
|
1749
|
+
|
1750
|
+
def create_schema_type_identifier(
|
1751
|
+
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
|
1752
|
+
) -> SchemaTypeIdentifier:
|
1753
|
+
types_mapping = []
|
1754
|
+
if model.types_mapping:
|
1755
|
+
types_mapping.extend(
|
1756
|
+
[
|
1757
|
+
self._create_component_from_model(types_map, config=config)
|
1758
|
+
for types_map in model.types_mapping
|
1759
|
+
]
|
1760
|
+
)
|
1761
|
+
model_schema_pointer: List[Union[InterpolatedString, str]] = (
|
1762
|
+
[x for x in model.schema_pointer] if model.schema_pointer else []
|
1763
|
+
)
|
1764
|
+
model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
|
1765
|
+
model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
|
1766
|
+
[x for x in model.type_pointer] if model.type_pointer else None
|
1767
|
+
)
|
1768
|
+
|
1769
|
+
return SchemaTypeIdentifier(
|
1770
|
+
schema_pointer=model_schema_pointer,
|
1771
|
+
key_pointer=model_key_pointer,
|
1772
|
+
type_pointer=model_type_pointer,
|
1773
|
+
types_mapping=types_mapping,
|
1774
|
+
parameters=model.parameters or {},
|
1775
|
+
)
|
1776
|
+
|
1777
|
+
def create_dynamic_schema_loader(
|
1778
|
+
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
1779
|
+
) -> DynamicSchemaLoader:
|
1780
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1781
|
+
combined_slicers = self._build_resumable_cursor_from_paginator(
|
1782
|
+
model.retriever, stream_slicer
|
1783
|
+
)
|
1784
|
+
|
1785
|
+
schema_transformations = []
|
1786
|
+
if model.schema_transformations:
|
1787
|
+
for transformation_model in model.schema_transformations:
|
1788
|
+
schema_transformations.append(
|
1789
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
1790
|
+
)
|
1791
|
+
|
1792
|
+
retriever = self._create_component_from_model(
|
1793
|
+
model=model.retriever,
|
1794
|
+
config=config,
|
1795
|
+
name="",
|
1796
|
+
primary_key=None,
|
1797
|
+
stream_slicer=combined_slicers,
|
1798
|
+
transformations=[],
|
1799
|
+
)
|
1800
|
+
schema_type_identifier = self._create_component_from_model(
|
1801
|
+
model.schema_type_identifier, config=config, parameters=model.parameters or {}
|
1802
|
+
)
|
1803
|
+
return DynamicSchemaLoader(
|
1804
|
+
retriever=retriever,
|
1805
|
+
config=config,
|
1806
|
+
schema_transformations=schema_transformations,
|
1807
|
+
schema_type_identifier=schema_type_identifier,
|
1808
|
+
parameters=model.parameters or {},
|
1809
|
+
)
|
1810
|
+
|
764
1811
|
@staticmethod
|
765
1812
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
|
766
1813
|
return JsonDecoder(parameters={})
|
767
1814
|
|
768
1815
|
@staticmethod
|
769
|
-
def
|
770
|
-
|
1816
|
+
def create_jsonl_decoder(
|
1817
|
+
model: JsonlDecoderModel, config: Config, **kwargs: Any
|
1818
|
+
) -> JsonlDecoder:
|
1819
|
+
return JsonlDecoder(parameters={})
|
1820
|
+
|
1821
|
+
@staticmethod
|
1822
|
+
def create_json_line_parser(
|
1823
|
+
model: JsonLineParserModel, config: Config, **kwargs: Any
|
1824
|
+
) -> JsonLineParser:
|
1825
|
+
return JsonLineParser(encoding=model.encoding)
|
1826
|
+
|
1827
|
+
@staticmethod
|
1828
|
+
def create_iterable_decoder(
|
1829
|
+
model: IterableDecoderModel, config: Config, **kwargs: Any
|
1830
|
+
) -> IterableDecoder:
|
1831
|
+
return IterableDecoder(parameters={})
|
1832
|
+
|
1833
|
+
@staticmethod
|
1834
|
+
def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
|
1835
|
+
return XmlDecoder(parameters={})
|
1836
|
+
|
1837
|
+
@staticmethod
|
1838
|
+
def create_gzipjson_decoder(
|
1839
|
+
model: GzipJsonDecoderModel, config: Config, **kwargs: Any
|
1840
|
+
) -> GzipJsonDecoder:
|
1841
|
+
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
1842
|
+
|
1843
|
+
def create_gzip_parser(
|
1844
|
+
self, model: GzipParserModel, config: Config, **kwargs: Any
|
1845
|
+
) -> GzipParser:
|
1846
|
+
inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
|
1847
|
+
return GzipParser(inner_parser=inner_parser)
|
1848
|
+
|
1849
|
+
@staticmethod
|
1850
|
+
def create_csv_parser(model: CsvParserModel, config: Config, **kwargs: Any) -> CsvParser:
|
1851
|
+
return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
|
1852
|
+
|
1853
|
+
def create_composite_raw_decoder(
|
1854
|
+
self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
|
1855
|
+
) -> CompositeRawDecoder:
|
1856
|
+
parser = self._create_component_from_model(model=model.parser, config=config)
|
1857
|
+
return CompositeRawDecoder(parser=parser)
|
1858
|
+
|
1859
|
+
@staticmethod
|
1860
|
+
def create_json_file_schema_loader(
|
1861
|
+
model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
|
1862
|
+
) -> JsonFileSchemaLoader:
|
1863
|
+
return JsonFileSchemaLoader(
|
1864
|
+
file_path=model.file_path or "", config=config, parameters=model.parameters or {}
|
1865
|
+
)
|
1866
|
+
|
1867
|
+
@staticmethod
|
1868
|
+
def create_jwt_authenticator(
|
1869
|
+
model: JwtAuthenticatorModel, config: Config, **kwargs: Any
|
1870
|
+
) -> JwtAuthenticator:
|
1871
|
+
jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
|
1872
|
+
jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
|
1873
|
+
return JwtAuthenticator(
|
1874
|
+
config=config,
|
1875
|
+
parameters=model.parameters or {},
|
1876
|
+
algorithm=JwtAlgorithm(model.algorithm.value),
|
1877
|
+
secret_key=model.secret_key,
|
1878
|
+
base64_encode_secret_key=model.base64_encode_secret_key,
|
1879
|
+
token_duration=model.token_duration,
|
1880
|
+
header_prefix=model.header_prefix,
|
1881
|
+
kid=jwt_headers.kid,
|
1882
|
+
typ=jwt_headers.typ,
|
1883
|
+
cty=jwt_headers.cty,
|
1884
|
+
iss=jwt_payload.iss,
|
1885
|
+
sub=jwt_payload.sub,
|
1886
|
+
aud=jwt_payload.aud,
|
1887
|
+
additional_jwt_headers=model.additional_jwt_headers,
|
1888
|
+
additional_jwt_payload=model.additional_jwt_payload,
|
1889
|
+
)
|
771
1890
|
|
772
1891
|
@staticmethod
|
773
|
-
def create_list_partition_router(
|
1892
|
+
def create_list_partition_router(
|
1893
|
+
model: ListPartitionRouterModel, config: Config, **kwargs: Any
|
1894
|
+
) -> ListPartitionRouter:
|
774
1895
|
request_option = (
|
775
1896
|
RequestOption(
|
776
1897
|
inject_into=RequestOptionType(model.request_option.inject_into.value),
|
@@ -789,7 +1910,9 @@ class ModelToComponentFactory:
|
|
789
1910
|
)
|
790
1911
|
|
791
1912
|
@staticmethod
|
792
|
-
def create_min_max_datetime(
|
1913
|
+
def create_min_max_datetime(
|
1914
|
+
model: MinMaxDatetimeModel, config: Config, **kwargs: Any
|
1915
|
+
) -> MinMaxDatetime:
|
793
1916
|
return MinMaxDatetime(
|
794
1917
|
datetime=model.datetime,
|
795
1918
|
datetime_format=model.datetime_format or "",
|
@@ -803,29 +1926,44 @@ class ModelToComponentFactory:
|
|
803
1926
|
return NoAuth(parameters=model.parameters or {})
|
804
1927
|
|
805
1928
|
@staticmethod
|
806
|
-
def create_no_pagination(
|
1929
|
+
def create_no_pagination(
|
1930
|
+
model: NoPaginationModel, config: Config, **kwargs: Any
|
1931
|
+
) -> NoPagination:
|
807
1932
|
return NoPagination(parameters={})
|
808
1933
|
|
809
|
-
def create_oauth_authenticator(
|
1934
|
+
def create_oauth_authenticator(
|
1935
|
+
self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
|
1936
|
+
) -> DeclarativeOauth2Authenticator:
|
810
1937
|
if model.refresh_token_updater:
|
811
1938
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
812
1939
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
813
1940
|
config,
|
814
|
-
InterpolatedString.create(
|
1941
|
+
InterpolatedString.create(
|
1942
|
+
model.token_refresh_endpoint, # type: ignore
|
1943
|
+
parameters=model.parameters or {},
|
1944
|
+
).eval(config),
|
815
1945
|
access_token_name=InterpolatedString.create(
|
816
1946
|
model.access_token_name or "access_token", parameters=model.parameters or {}
|
817
1947
|
).eval(config),
|
818
1948
|
refresh_token_name=model.refresh_token_updater.refresh_token_name,
|
819
|
-
expires_in_name=InterpolatedString.create(
|
820
|
-
|
821
|
-
),
|
822
|
-
client_id=InterpolatedString.create(
|
823
|
-
|
1949
|
+
expires_in_name=InterpolatedString.create(
|
1950
|
+
model.expires_in_name or "expires_in", parameters=model.parameters or {}
|
1951
|
+
).eval(config),
|
1952
|
+
client_id=InterpolatedString.create(
|
1953
|
+
model.client_id, parameters=model.parameters or {}
|
1954
|
+
).eval(config),
|
1955
|
+
client_secret=InterpolatedString.create(
|
1956
|
+
model.client_secret, parameters=model.parameters or {}
|
1957
|
+
).eval(config),
|
824
1958
|
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
825
1959
|
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
826
1960
|
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
827
|
-
grant_type=InterpolatedString.create(
|
828
|
-
|
1961
|
+
grant_type=InterpolatedString.create(
|
1962
|
+
model.grant_type or "refresh_token", parameters=model.parameters or {}
|
1963
|
+
).eval(config),
|
1964
|
+
refresh_request_body=InterpolatedMapping(
|
1965
|
+
model.refresh_request_body or {}, parameters=model.parameters or {}
|
1966
|
+
).eval(config),
|
829
1967
|
scopes=model.scopes,
|
830
1968
|
token_expiry_date_format=model.token_expiry_date_format,
|
831
1969
|
message_repository=self._message_repository,
|
@@ -836,6 +1974,7 @@ class ModelToComponentFactory:
|
|
836
1974
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
837
1975
|
return DeclarativeOauth2Authenticator( # type: ignore
|
838
1976
|
access_token_name=model.access_token_name or "access_token",
|
1977
|
+
access_token_value=model.access_token_value,
|
839
1978
|
client_id=model.client_id,
|
840
1979
|
client_secret=model.client_secret,
|
841
1980
|
expires_in_name=model.expires_in_name or "expires_in",
|
@@ -844,7 +1983,7 @@ class ModelToComponentFactory:
|
|
844
1983
|
refresh_token=model.refresh_token,
|
845
1984
|
scopes=model.scopes,
|
846
1985
|
token_expiry_date=model.token_expiry_date,
|
847
|
-
token_expiry_date_format=model.token_expiry_date_format,
|
1986
|
+
token_expiry_date_format=model.token_expiry_date_format,
|
848
1987
|
token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
|
849
1988
|
token_refresh_endpoint=model.token_refresh_endpoint,
|
850
1989
|
config=config,
|
@@ -853,16 +1992,33 @@ class ModelToComponentFactory:
|
|
853
1992
|
)
|
854
1993
|
|
855
1994
|
@staticmethod
|
856
|
-
def create_offset_increment(
|
1995
|
+
def create_offset_increment(
|
1996
|
+
model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
|
1997
|
+
) -> OffsetIncrement:
|
1998
|
+
if isinstance(decoder, PaginationDecoderDecorator):
|
1999
|
+
if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
|
2000
|
+
raise ValueError(
|
2001
|
+
f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
2002
|
+
)
|
2003
|
+
decoder_to_use = decoder
|
2004
|
+
else:
|
2005
|
+
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
2006
|
+
raise ValueError(
|
2007
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
2008
|
+
)
|
2009
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
857
2010
|
return OffsetIncrement(
|
858
2011
|
page_size=model.page_size,
|
859
2012
|
config=config,
|
2013
|
+
decoder=decoder_to_use,
|
860
2014
|
inject_on_first_request=model.inject_on_first_request or False,
|
861
2015
|
parameters=model.parameters or {},
|
862
2016
|
)
|
863
2017
|
|
864
2018
|
@staticmethod
|
865
|
-
def create_page_increment(
|
2019
|
+
def create_page_increment(
|
2020
|
+
model: PageIncrementModel, config: Config, **kwargs: Any
|
2021
|
+
) -> PageIncrement:
|
866
2022
|
return PageIncrement(
|
867
2023
|
page_size=model.page_size,
|
868
2024
|
config=config,
|
@@ -871,28 +2027,42 @@ class ModelToComponentFactory:
|
|
871
2027
|
parameters=model.parameters or {},
|
872
2028
|
)
|
873
2029
|
|
874
|
-
def create_parent_stream_config(
|
2030
|
+
def create_parent_stream_config(
|
2031
|
+
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2032
|
+
) -> ParentStreamConfig:
|
875
2033
|
declarative_stream = self._create_component_from_model(model.stream, config=config)
|
876
|
-
request_option =
|
2034
|
+
request_option = (
|
2035
|
+
self._create_component_from_model(model.request_option, config=config)
|
2036
|
+
if model.request_option
|
2037
|
+
else None
|
2038
|
+
)
|
877
2039
|
return ParentStreamConfig(
|
878
2040
|
parent_key=model.parent_key,
|
879
2041
|
request_option=request_option,
|
880
2042
|
stream=declarative_stream,
|
881
2043
|
partition_field=model.partition_field,
|
882
2044
|
config=config,
|
2045
|
+
incremental_dependency=model.incremental_dependency or False,
|
883
2046
|
parameters=model.parameters or {},
|
2047
|
+
extra_fields=model.extra_fields,
|
884
2048
|
)
|
885
2049
|
|
886
2050
|
@staticmethod
|
887
|
-
def create_record_filter(
|
888
|
-
|
2051
|
+
def create_record_filter(
|
2052
|
+
model: RecordFilterModel, config: Config, **kwargs: Any
|
2053
|
+
) -> RecordFilter:
|
2054
|
+
return RecordFilter(
|
2055
|
+
condition=model.condition or "", config=config, parameters=model.parameters or {}
|
2056
|
+
)
|
889
2057
|
|
890
2058
|
@staticmethod
|
891
2059
|
def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
|
892
2060
|
return RequestPath(parameters={})
|
893
2061
|
|
894
2062
|
@staticmethod
|
895
|
-
def create_request_option(
|
2063
|
+
def create_request_option(
|
2064
|
+
model: RequestOptionModel, config: Config, **kwargs: Any
|
2065
|
+
) -> RequestOption:
|
896
2066
|
inject_into = RequestOptionType(model.inject_into.value)
|
897
2067
|
return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
|
898
2068
|
|
@@ -901,29 +2071,60 @@ class ModelToComponentFactory:
|
|
901
2071
|
model: RecordSelectorModel,
|
902
2072
|
config: Config,
|
903
2073
|
*,
|
904
|
-
|
2074
|
+
name: str,
|
2075
|
+
transformations: List[RecordTransformation] | None = None,
|
2076
|
+
decoder: Decoder | None = None,
|
2077
|
+
client_side_incremental_sync: Dict[str, Any] | None = None,
|
905
2078
|
**kwargs: Any,
|
906
2079
|
) -> RecordSelector:
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
2080
|
+
extractor = self._create_component_from_model(
|
2081
|
+
model=model.extractor, decoder=decoder, config=config
|
2082
|
+
)
|
2083
|
+
record_filter = (
|
2084
|
+
self._create_component_from_model(model.record_filter, config=config)
|
2085
|
+
if model.record_filter
|
2086
|
+
else None
|
2087
|
+
)
|
2088
|
+
if client_side_incremental_sync:
|
2089
|
+
record_filter = ClientSideIncrementalRecordFilterDecorator(
|
2090
|
+
config=config,
|
2091
|
+
parameters=model.parameters,
|
2092
|
+
condition=model.record_filter.condition
|
2093
|
+
if (model.record_filter and hasattr(model.record_filter, "condition"))
|
2094
|
+
else None,
|
2095
|
+
**client_side_incremental_sync,
|
2096
|
+
)
|
2097
|
+
schema_normalization = (
|
2098
|
+
TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
|
2099
|
+
if isinstance(model.schema_normalization, SchemaNormalizationModel)
|
2100
|
+
else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here
|
2101
|
+
)
|
911
2102
|
|
912
2103
|
return RecordSelector(
|
913
2104
|
extractor=extractor,
|
2105
|
+
name=name,
|
914
2106
|
config=config,
|
915
2107
|
record_filter=record_filter,
|
916
|
-
transformations=transformations,
|
2108
|
+
transformations=transformations or [],
|
917
2109
|
schema_normalization=schema_normalization,
|
918
2110
|
parameters=model.parameters or {},
|
919
2111
|
)
|
920
2112
|
|
921
2113
|
@staticmethod
|
922
|
-
def create_remove_fields(
|
923
|
-
|
2114
|
+
def create_remove_fields(
|
2115
|
+
model: RemoveFieldsModel, config: Config, **kwargs: Any
|
2116
|
+
) -> RemoveFields:
|
2117
|
+
return RemoveFields(
|
2118
|
+
field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
|
2119
|
+
)
|
924
2120
|
|
925
|
-
def create_selective_authenticator(
|
926
|
-
|
2121
|
+
def create_selective_authenticator(
|
2122
|
+
self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
|
2123
|
+
) -> DeclarativeAuthenticator:
|
2124
|
+
authenticators = {
|
2125
|
+
name: self._create_component_from_model(model=auth, config=config)
|
2126
|
+
for name, auth in model.authenticators.items()
|
2127
|
+
}
|
927
2128
|
# SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
|
928
2129
|
return SelectiveAuthenticator( # type: ignore[abstract]
|
929
2130
|
config=config,
|
@@ -957,14 +2158,49 @@ class ModelToComponentFactory:
|
|
957
2158
|
name: str,
|
958
2159
|
primary_key: Optional[Union[str, List[str], List[List[str]]]],
|
959
2160
|
stream_slicer: Optional[StreamSlicer],
|
2161
|
+
request_options_provider: Optional[RequestOptionsProvider] = None,
|
960
2162
|
stop_condition_on_cursor: bool = False,
|
2163
|
+
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
961
2164
|
transformations: List[RecordTransformation],
|
962
2165
|
) -> SimpleRetriever:
|
963
|
-
|
964
|
-
|
965
|
-
|
2166
|
+
decoder = (
|
2167
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
2168
|
+
if model.decoder
|
2169
|
+
else JsonDecoder(parameters={})
|
2170
|
+
)
|
2171
|
+
requester = self._create_component_from_model(
|
2172
|
+
model=model.requester, decoder=decoder, config=config, name=name
|
2173
|
+
)
|
2174
|
+
record_selector = self._create_component_from_model(
|
2175
|
+
model=model.record_selector,
|
2176
|
+
name=name,
|
2177
|
+
config=config,
|
2178
|
+
decoder=decoder,
|
2179
|
+
transformations=transformations,
|
2180
|
+
client_side_incremental_sync=client_side_incremental_sync,
|
2181
|
+
)
|
2182
|
+
url_base = (
|
2183
|
+
model.requester.url_base
|
2184
|
+
if hasattr(model.requester, "url_base")
|
2185
|
+
else requester.get_url_base()
|
2186
|
+
)
|
2187
|
+
|
2188
|
+
# Define cursor only if per partition or common incremental support is needed
|
2189
|
+
cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
|
2190
|
+
|
2191
|
+
if (
|
2192
|
+
not isinstance(stream_slicer, DatetimeBasedCursor)
|
2193
|
+
or type(stream_slicer) is not DatetimeBasedCursor
|
2194
|
+
) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
|
2195
|
+
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
2196
|
+
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
2197
|
+
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
2198
|
+
# request_options_provider
|
2199
|
+
request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
|
2200
|
+
elif not request_options_provider:
|
2201
|
+
request_options_provider = DefaultRequestOptionsProvider(parameters={})
|
2202
|
+
|
966
2203
|
stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
|
967
|
-
cursor = stream_slicer if isinstance(stream_slicer, Cursor) else None
|
968
2204
|
|
969
2205
|
cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
|
970
2206
|
paginator = (
|
@@ -972,13 +2208,16 @@ class ModelToComponentFactory:
|
|
972
2208
|
model=model.paginator,
|
973
2209
|
config=config,
|
974
2210
|
url_base=url_base,
|
2211
|
+
decoder=decoder,
|
975
2212
|
cursor_used_for_stop_condition=cursor_used_for_stop_condition,
|
976
2213
|
)
|
977
2214
|
if model.paginator
|
978
2215
|
else NoPagination(parameters={})
|
979
2216
|
)
|
980
2217
|
|
981
|
-
ignore_stream_slicer_parameters_on_paginated_requests =
|
2218
|
+
ignore_stream_slicer_parameters_on_paginated_requests = (
|
2219
|
+
model.ignore_stream_slicer_parameters_on_paginated_requests or False
|
2220
|
+
)
|
982
2221
|
|
983
2222
|
if self._limit_slices_fetched or self._emit_connector_builder_messages:
|
984
2223
|
return SimpleRetrieverTestReadDecorator(
|
@@ -988,6 +2227,7 @@ class ModelToComponentFactory:
|
|
988
2227
|
requester=requester,
|
989
2228
|
record_selector=record_selector,
|
990
2229
|
stream_slicer=stream_slicer,
|
2230
|
+
request_option_provider=request_options_provider,
|
991
2231
|
cursor=cursor,
|
992
2232
|
config=config,
|
993
2233
|
maximum_number_of_slices=self._limit_slices_fetched or 5,
|
@@ -1001,12 +2241,192 @@ class ModelToComponentFactory:
|
|
1001
2241
|
requester=requester,
|
1002
2242
|
record_selector=record_selector,
|
1003
2243
|
stream_slicer=stream_slicer,
|
2244
|
+
request_option_provider=request_options_provider,
|
1004
2245
|
cursor=cursor,
|
1005
2246
|
config=config,
|
1006
2247
|
ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
|
1007
2248
|
parameters=model.parameters or {},
|
1008
2249
|
)
|
1009
2250
|
|
2251
|
+
def _create_async_job_status_mapping(
|
2252
|
+
self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
|
2253
|
+
) -> Mapping[str, AsyncJobStatus]:
|
2254
|
+
api_status_to_cdk_status = {}
|
2255
|
+
for cdk_status, api_statuses in model.dict().items():
|
2256
|
+
if cdk_status == "type":
|
2257
|
+
# This is an element of the dict because of the typing of the CDK but it is not a CDK status
|
2258
|
+
continue
|
2259
|
+
|
2260
|
+
for status in api_statuses:
|
2261
|
+
if status in api_status_to_cdk_status:
|
2262
|
+
raise ValueError(
|
2263
|
+
f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
|
2264
|
+
)
|
2265
|
+
api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
|
2266
|
+
return api_status_to_cdk_status
|
2267
|
+
|
2268
|
+
def _get_async_job_status(self, status: str) -> AsyncJobStatus:
|
2269
|
+
match status:
|
2270
|
+
case "running":
|
2271
|
+
return AsyncJobStatus.RUNNING
|
2272
|
+
case "completed":
|
2273
|
+
return AsyncJobStatus.COMPLETED
|
2274
|
+
case "failed":
|
2275
|
+
return AsyncJobStatus.FAILED
|
2276
|
+
case "timeout":
|
2277
|
+
return AsyncJobStatus.TIMED_OUT
|
2278
|
+
case _:
|
2279
|
+
raise ValueError(f"Unsupported CDK status {status}")
|
2280
|
+
|
2281
|
+
def create_async_retriever(
|
2282
|
+
self,
|
2283
|
+
model: AsyncRetrieverModel,
|
2284
|
+
config: Config,
|
2285
|
+
*,
|
2286
|
+
name: str,
|
2287
|
+
primary_key: Optional[
|
2288
|
+
Union[str, List[str], List[List[str]]]
|
2289
|
+
], # this seems to be needed to match create_simple_retriever
|
2290
|
+
stream_slicer: Optional[StreamSlicer],
|
2291
|
+
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
2292
|
+
transformations: List[RecordTransformation],
|
2293
|
+
**kwargs: Any,
|
2294
|
+
) -> AsyncRetriever:
|
2295
|
+
decoder = (
|
2296
|
+
self._create_component_from_model(model=model.decoder, config=config)
|
2297
|
+
if model.decoder
|
2298
|
+
else JsonDecoder(parameters={})
|
2299
|
+
)
|
2300
|
+
record_selector = self._create_component_from_model(
|
2301
|
+
model=model.record_selector,
|
2302
|
+
config=config,
|
2303
|
+
decoder=decoder,
|
2304
|
+
name=name,
|
2305
|
+
transformations=transformations,
|
2306
|
+
client_side_incremental_sync=client_side_incremental_sync,
|
2307
|
+
)
|
2308
|
+
stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
|
2309
|
+
creation_requester = self._create_component_from_model(
|
2310
|
+
model=model.creation_requester,
|
2311
|
+
decoder=decoder,
|
2312
|
+
config=config,
|
2313
|
+
name=f"job creation - {name}",
|
2314
|
+
)
|
2315
|
+
polling_requester = self._create_component_from_model(
|
2316
|
+
model=model.polling_requester,
|
2317
|
+
decoder=decoder,
|
2318
|
+
config=config,
|
2319
|
+
name=f"job polling - {name}",
|
2320
|
+
)
|
2321
|
+
job_download_components_name = f"job download - {name}"
|
2322
|
+
download_decoder = (
|
2323
|
+
self._create_component_from_model(model=model.download_decoder, config=config)
|
2324
|
+
if model.download_decoder
|
2325
|
+
else JsonDecoder(parameters={})
|
2326
|
+
)
|
2327
|
+
download_extractor = (
|
2328
|
+
self._create_component_from_model(
|
2329
|
+
model=model.download_extractor,
|
2330
|
+
config=config,
|
2331
|
+
decoder=download_decoder,
|
2332
|
+
parameters=model.parameters,
|
2333
|
+
)
|
2334
|
+
if model.download_extractor
|
2335
|
+
else DpathExtractor(
|
2336
|
+
[],
|
2337
|
+
config=config,
|
2338
|
+
decoder=download_decoder,
|
2339
|
+
parameters=model.parameters or {},
|
2340
|
+
)
|
2341
|
+
)
|
2342
|
+
download_requester = self._create_component_from_model(
|
2343
|
+
model=model.download_requester,
|
2344
|
+
decoder=download_decoder,
|
2345
|
+
config=config,
|
2346
|
+
name=job_download_components_name,
|
2347
|
+
)
|
2348
|
+
download_retriever = SimpleRetriever(
|
2349
|
+
requester=download_requester,
|
2350
|
+
record_selector=RecordSelector(
|
2351
|
+
extractor=download_extractor,
|
2352
|
+
name=name,
|
2353
|
+
record_filter=None,
|
2354
|
+
transformations=[],
|
2355
|
+
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2356
|
+
config=config,
|
2357
|
+
parameters={},
|
2358
|
+
),
|
2359
|
+
primary_key=None,
|
2360
|
+
name=job_download_components_name,
|
2361
|
+
paginator=(
|
2362
|
+
self._create_component_from_model(
|
2363
|
+
model=model.download_paginator, decoder=decoder, config=config, url_base=""
|
2364
|
+
)
|
2365
|
+
if model.download_paginator
|
2366
|
+
else NoPagination(parameters={})
|
2367
|
+
),
|
2368
|
+
config=config,
|
2369
|
+
parameters={},
|
2370
|
+
)
|
2371
|
+
abort_requester = (
|
2372
|
+
self._create_component_from_model(
|
2373
|
+
model=model.abort_requester,
|
2374
|
+
decoder=decoder,
|
2375
|
+
config=config,
|
2376
|
+
name=f"job abort - {name}",
|
2377
|
+
)
|
2378
|
+
if model.abort_requester
|
2379
|
+
else None
|
2380
|
+
)
|
2381
|
+
delete_requester = (
|
2382
|
+
self._create_component_from_model(
|
2383
|
+
model=model.delete_requester,
|
2384
|
+
decoder=decoder,
|
2385
|
+
config=config,
|
2386
|
+
name=f"job delete - {name}",
|
2387
|
+
)
|
2388
|
+
if model.delete_requester
|
2389
|
+
else None
|
2390
|
+
)
|
2391
|
+
status_extractor = self._create_component_from_model(
|
2392
|
+
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2393
|
+
)
|
2394
|
+
urls_extractor = self._create_component_from_model(
|
2395
|
+
model=model.urls_extractor, decoder=decoder, config=config, name=name
|
2396
|
+
)
|
2397
|
+
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
2398
|
+
creation_requester=creation_requester,
|
2399
|
+
polling_requester=polling_requester,
|
2400
|
+
download_retriever=download_retriever,
|
2401
|
+
abort_requester=abort_requester,
|
2402
|
+
delete_requester=delete_requester,
|
2403
|
+
status_extractor=status_extractor,
|
2404
|
+
status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
|
2405
|
+
urls_extractor=urls_extractor,
|
2406
|
+
)
|
2407
|
+
|
2408
|
+
async_job_partition_router = AsyncJobPartitionRouter(
|
2409
|
+
job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
|
2410
|
+
job_repository,
|
2411
|
+
stream_slices,
|
2412
|
+
JobTracker(1),
|
2413
|
+
# FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2414
|
+
self._message_repository,
|
2415
|
+
has_bulk_parent=False,
|
2416
|
+
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
2417
|
+
),
|
2418
|
+
stream_slicer=stream_slicer,
|
2419
|
+
config=config,
|
2420
|
+
parameters=model.parameters or {},
|
2421
|
+
)
|
2422
|
+
|
2423
|
+
return AsyncRetriever(
|
2424
|
+
record_selector=record_selector,
|
2425
|
+
stream_slicer=async_job_partition_router,
|
2426
|
+
config=config,
|
2427
|
+
parameters=model.parameters or {},
|
2428
|
+
)
|
2429
|
+
|
1010
2430
|
@staticmethod
|
1011
2431
|
def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
|
1012
2432
|
return Spec(
|
@@ -1023,19 +2443,28 @@ class ModelToComponentFactory:
|
|
1023
2443
|
if model.parent_stream_configs:
|
1024
2444
|
parent_stream_configs.extend(
|
1025
2445
|
[
|
1026
|
-
self._create_message_repository_substream_wrapper(
|
2446
|
+
self._create_message_repository_substream_wrapper(
|
2447
|
+
model=parent_stream_config, config=config
|
2448
|
+
)
|
1027
2449
|
for parent_stream_config in model.parent_stream_configs
|
1028
2450
|
]
|
1029
2451
|
)
|
1030
2452
|
|
1031
|
-
return SubstreamPartitionRouter(
|
2453
|
+
return SubstreamPartitionRouter(
|
2454
|
+
parent_stream_configs=parent_stream_configs,
|
2455
|
+
parameters=model.parameters or {},
|
2456
|
+
config=config,
|
2457
|
+
)
|
1032
2458
|
|
1033
|
-
def _create_message_repository_substream_wrapper(
|
2459
|
+
def _create_message_repository_substream_wrapper(
|
2460
|
+
self, model: ParentStreamConfigModel, config: Config
|
2461
|
+
) -> Any:
|
1034
2462
|
substream_factory = ModelToComponentFactory(
|
1035
2463
|
limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
|
1036
2464
|
limit_slices_fetched=self._limit_slices_fetched,
|
1037
2465
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
1038
2466
|
disable_retries=self._disable_retries,
|
2467
|
+
disable_cache=self._disable_cache,
|
1039
2468
|
message_repository=LogAppenderMessageRepositoryDecorator(
|
1040
2469
|
{"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
|
1041
2470
|
self._message_repository,
|
@@ -1045,15 +2474,29 @@ class ModelToComponentFactory:
|
|
1045
2474
|
return substream_factory._create_component_from_model(model=model, config=config)
|
1046
2475
|
|
1047
2476
|
@staticmethod
|
1048
|
-
def create_wait_time_from_header(
|
1049
|
-
|
2477
|
+
def create_wait_time_from_header(
|
2478
|
+
model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
|
2479
|
+
) -> WaitTimeFromHeaderBackoffStrategy:
|
2480
|
+
return WaitTimeFromHeaderBackoffStrategy(
|
2481
|
+
header=model.header,
|
2482
|
+
parameters=model.parameters or {},
|
2483
|
+
config=config,
|
2484
|
+
regex=model.regex,
|
2485
|
+
max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
|
2486
|
+
if model.max_waiting_time_in_seconds is not None
|
2487
|
+
else None,
|
2488
|
+
)
|
1050
2489
|
|
1051
2490
|
@staticmethod
|
1052
2491
|
def create_wait_until_time_from_header(
|
1053
2492
|
model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
|
1054
2493
|
) -> WaitUntilTimeFromHeaderBackoffStrategy:
|
1055
2494
|
return WaitUntilTimeFromHeaderBackoffStrategy(
|
1056
|
-
header=model.header,
|
2495
|
+
header=model.header,
|
2496
|
+
parameters=model.parameters or {},
|
2497
|
+
config=config,
|
2498
|
+
min_wait=model.min_wait,
|
2499
|
+
regex=model.regex,
|
1057
2500
|
)
|
1058
2501
|
|
1059
2502
|
def get_message_repository(self) -> MessageRepository:
|
@@ -1061,3 +2504,94 @@ class ModelToComponentFactory:
|
|
1061
2504
|
|
1062
2505
|
def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
|
1063
2506
|
return Level.DEBUG if emit_connector_builder_messages else Level.INFO
|
2507
|
+
|
2508
|
+
@staticmethod
|
2509
|
+
def create_components_mapping_definition(
|
2510
|
+
model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
|
2511
|
+
) -> ComponentMappingDefinition:
|
2512
|
+
interpolated_value = InterpolatedString.create(
|
2513
|
+
model.value, parameters=model.parameters or {}
|
2514
|
+
)
|
2515
|
+
field_path = [
|
2516
|
+
InterpolatedString.create(path, parameters=model.parameters or {})
|
2517
|
+
for path in model.field_path
|
2518
|
+
]
|
2519
|
+
return ComponentMappingDefinition(
|
2520
|
+
field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString
|
2521
|
+
value=interpolated_value,
|
2522
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
|
2523
|
+
parameters=model.parameters or {},
|
2524
|
+
)
|
2525
|
+
|
2526
|
+
def create_http_components_resolver(
|
2527
|
+
self, model: HttpComponentsResolverModel, config: Config
|
2528
|
+
) -> Any:
|
2529
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
2530
|
+
combined_slicers = self._build_resumable_cursor_from_paginator(
|
2531
|
+
model.retriever, stream_slicer
|
2532
|
+
)
|
2533
|
+
|
2534
|
+
retriever = self._create_component_from_model(
|
2535
|
+
model=model.retriever,
|
2536
|
+
config=config,
|
2537
|
+
name="",
|
2538
|
+
primary_key=None,
|
2539
|
+
stream_slicer=stream_slicer if stream_slicer else combined_slicers,
|
2540
|
+
transformations=[],
|
2541
|
+
)
|
2542
|
+
|
2543
|
+
components_mapping = [
|
2544
|
+
self._create_component_from_model(
|
2545
|
+
model=components_mapping_definition_model,
|
2546
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
2547
|
+
components_mapping_definition_model.value_type
|
2548
|
+
),
|
2549
|
+
config=config,
|
2550
|
+
)
|
2551
|
+
for components_mapping_definition_model in model.components_mapping
|
2552
|
+
]
|
2553
|
+
|
2554
|
+
return HttpComponentsResolver(
|
2555
|
+
retriever=retriever,
|
2556
|
+
config=config,
|
2557
|
+
components_mapping=components_mapping,
|
2558
|
+
parameters=model.parameters or {},
|
2559
|
+
)
|
2560
|
+
|
2561
|
+
@staticmethod
|
2562
|
+
def create_stream_config(
|
2563
|
+
model: StreamConfigModel, config: Config, **kwargs: Any
|
2564
|
+
) -> StreamConfig:
|
2565
|
+
model_configs_pointer: List[Union[InterpolatedString, str]] = (
|
2566
|
+
[x for x in model.configs_pointer] if model.configs_pointer else []
|
2567
|
+
)
|
2568
|
+
|
2569
|
+
return StreamConfig(
|
2570
|
+
configs_pointer=model_configs_pointer,
|
2571
|
+
parameters=model.parameters or {},
|
2572
|
+
)
|
2573
|
+
|
2574
|
+
def create_config_components_resolver(
|
2575
|
+
self, model: ConfigComponentsResolverModel, config: Config
|
2576
|
+
) -> Any:
|
2577
|
+
stream_config = self._create_component_from_model(
|
2578
|
+
model.stream_config, config=config, parameters=model.parameters or {}
|
2579
|
+
)
|
2580
|
+
|
2581
|
+
components_mapping = [
|
2582
|
+
self._create_component_from_model(
|
2583
|
+
model=components_mapping_definition_model,
|
2584
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
2585
|
+
components_mapping_definition_model.value_type
|
2586
|
+
),
|
2587
|
+
config=config,
|
2588
|
+
)
|
2589
|
+
for components_mapping_definition_model in model.components_mapping
|
2590
|
+
]
|
2591
|
+
|
2592
|
+
return ConfigComponentsResolver(
|
2593
|
+
stream_config=stream_config,
|
2594
|
+
config=config,
|
2595
|
+
components_mapping=components_mapping,
|
2596
|
+
parameters=model.parameters or {},
|
2597
|
+
)
|